fsspec 2023.10.0__py3-none-any.whl → 2024.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +3 -3
- fsspec/archive.py +4 -4
- fsspec/asyn.py +43 -53
- fsspec/caching.py +1 -1
- fsspec/callbacks.py +98 -12
- fsspec/compression.py +3 -3
- fsspec/core.py +16 -3
- fsspec/exceptions.py +0 -4
- fsspec/generic.py +11 -4
- fsspec/gui.py +4 -3
- fsspec/implementations/arrow.py +9 -0
- fsspec/implementations/cache_mapper.py +2 -6
- fsspec/implementations/cached.py +92 -18
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/dbfs.py +14 -4
- fsspec/implementations/dirfs.py +6 -0
- fsspec/implementations/ftp.py +18 -13
- fsspec/implementations/github.py +17 -5
- fsspec/implementations/http.py +42 -51
- fsspec/implementations/libarchive.py +2 -3
- fsspec/implementations/local.py +11 -4
- fsspec/implementations/memory.py +2 -2
- fsspec/implementations/reference.py +127 -56
- fsspec/implementations/sftp.py +6 -5
- fsspec/implementations/smb.py +0 -1
- fsspec/implementations/tar.py +2 -1
- fsspec/implementations/webhdfs.py +46 -5
- fsspec/implementations/zip.py +11 -3
- fsspec/parquet.py +3 -5
- fsspec/registry.py +2 -1
- fsspec/spec.py +51 -61
- fsspec/tests/abstract/common.py +5 -5
- fsspec/tests/abstract/copy.py +21 -7
- fsspec/tests/abstract/put.py +21 -7
- fsspec/transaction.py +8 -4
- fsspec/utils.py +114 -1
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/METADATA +1 -2
- fsspec-2024.2.0.dist-info/RECORD +54 -0
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/WHEEL +1 -1
- fsspec-2023.10.0.dist-info/RECORD +0 -53
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/top_level.txt +0 -0
fsspec/parquet.py
CHANGED
|
@@ -131,10 +131,8 @@ def open_parquet_file(
|
|
|
131
131
|
cache_type="parts",
|
|
132
132
|
cache_options={
|
|
133
133
|
**options,
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
"strict": strict,
|
|
137
|
-
},
|
|
134
|
+
"data": data.get(fn, {}),
|
|
135
|
+
"strict": strict,
|
|
138
136
|
},
|
|
139
137
|
**kwargs,
|
|
140
138
|
)
|
|
@@ -338,7 +336,7 @@ def _transfer_ranges(fs, blocks, paths, starts, ends):
|
|
|
338
336
|
|
|
339
337
|
def _add_header_magic(data):
|
|
340
338
|
# Add b"PAR1" to file headers
|
|
341
|
-
for
|
|
339
|
+
for path in list(data.keys()):
|
|
342
340
|
add_magic = True
|
|
343
341
|
for k in data[path].keys():
|
|
344
342
|
if k[0] == 0 and k[1] >= 4:
|
fsspec/registry.py
CHANGED
|
@@ -57,9 +57,10 @@ def register_implementation(name, cls, clobber=False, errtxt=None):
|
|
|
57
57
|
_registry[name] = cls
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
# protocols mapped to the class which implements them. This dict can
|
|
60
|
+
# protocols mapped to the class which implements them. This dict can be
|
|
61
61
|
# updated with register_implementation
|
|
62
62
|
known_implementations = {
|
|
63
|
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
|
63
64
|
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
|
64
65
|
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
|
65
66
|
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
fsspec/spec.py
CHANGED
|
@@ -11,12 +11,13 @@ from glob import has_magic
|
|
|
11
11
|
from hashlib import sha256
|
|
12
12
|
from typing import ClassVar
|
|
13
13
|
|
|
14
|
-
from .callbacks import
|
|
14
|
+
from .callbacks import DEFAULT_CALLBACK
|
|
15
15
|
from .config import apply_config, conf
|
|
16
16
|
from .dircache import DirCache
|
|
17
17
|
from .transaction import Transaction
|
|
18
18
|
from .utils import (
|
|
19
19
|
_unstrip_protocol,
|
|
20
|
+
glob_translate,
|
|
20
21
|
isfilelike,
|
|
21
22
|
other_paths,
|
|
22
23
|
read_block,
|
|
@@ -109,6 +110,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
109
110
|
async_impl = False
|
|
110
111
|
mirror_sync_methods = False
|
|
111
112
|
root_marker = "" # For some FSs, may require leading '/' or other character
|
|
113
|
+
transaction_type = Transaction
|
|
112
114
|
|
|
113
115
|
#: Extra *class attributes* that should be considered when hashing.
|
|
114
116
|
_extra_tokenize_attributes = ()
|
|
@@ -235,20 +237,20 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
235
237
|
for the normal and exception cases.
|
|
236
238
|
"""
|
|
237
239
|
if self._transaction is None:
|
|
238
|
-
self._transaction =
|
|
240
|
+
self._transaction = self.transaction_type(self)
|
|
239
241
|
return self._transaction
|
|
240
242
|
|
|
241
243
|
def start_transaction(self):
|
|
242
244
|
"""Begin write transaction for deferring files, non-context version"""
|
|
243
245
|
self._intrans = True
|
|
244
|
-
self._transaction =
|
|
246
|
+
self._transaction = self.transaction_type(self)
|
|
245
247
|
return self.transaction
|
|
246
248
|
|
|
247
249
|
def end_transaction(self):
|
|
248
250
|
"""Finish write transaction, non-context version"""
|
|
249
251
|
self.transaction.complete()
|
|
250
252
|
self._transaction = None
|
|
251
|
-
# The invalid cache must be cleared after the
|
|
253
|
+
# The invalid cache must be cleared after the transaction is completed.
|
|
252
254
|
for path in self._invalidated_caches_in_transaction:
|
|
253
255
|
self.invalidate_cache(path)
|
|
254
256
|
self._invalidated_caches_in_transaction.clear()
|
|
@@ -551,10 +553,6 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
551
553
|
|
|
552
554
|
The `maxdepth` option is applied on the first `**` found in the path.
|
|
553
555
|
|
|
554
|
-
Search path names that contain embedded characters special to this
|
|
555
|
-
implementation of glob may not produce expected results;
|
|
556
|
-
e.g., ``foo/bar/*starredfilename*``.
|
|
557
|
-
|
|
558
556
|
kwargs are passed to ``ls``.
|
|
559
557
|
"""
|
|
560
558
|
if maxdepth is not None and maxdepth < 1:
|
|
@@ -562,8 +560,12 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
562
560
|
|
|
563
561
|
import re
|
|
564
562
|
|
|
565
|
-
|
|
563
|
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
|
564
|
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
|
566
565
|
path = self._strip_protocol(path)
|
|
566
|
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
|
567
|
+
tuple(sep + "**" for sep in seps)
|
|
568
|
+
)
|
|
567
569
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
568
570
|
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
|
569
571
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
@@ -573,11 +575,11 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
573
575
|
detail = kwargs.pop("detail", False)
|
|
574
576
|
|
|
575
577
|
if not has_magic(path):
|
|
576
|
-
if self.exists(path):
|
|
578
|
+
if self.exists(path, **kwargs):
|
|
577
579
|
if not detail:
|
|
578
580
|
return [path]
|
|
579
581
|
else:
|
|
580
|
-
return {path: self.info(path)}
|
|
582
|
+
return {path: self.info(path, **kwargs)}
|
|
581
583
|
else:
|
|
582
584
|
if not detail:
|
|
583
585
|
return [] # glob of non-existent returns empty
|
|
@@ -600,47 +602,22 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
600
602
|
depth = None
|
|
601
603
|
|
|
602
604
|
allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
606
|
-
# for shell globbing details.
|
|
607
|
-
pattern = (
|
|
608
|
-
"^"
|
|
609
|
-
+ (
|
|
610
|
-
path.replace("\\", r"\\")
|
|
611
|
-
.replace(".", r"\.")
|
|
612
|
-
.replace("+", r"\+")
|
|
613
|
-
.replace("//", "/")
|
|
614
|
-
.replace("(", r"\(")
|
|
615
|
-
.replace(")", r"\)")
|
|
616
|
-
.replace("|", r"\|")
|
|
617
|
-
.replace("^", r"\^")
|
|
618
|
-
.replace("$", r"\$")
|
|
619
|
-
.replace("{", r"\{")
|
|
620
|
-
.replace("}", r"\}")
|
|
621
|
-
.rstrip("/")
|
|
622
|
-
.replace("?", ".")
|
|
623
|
-
)
|
|
624
|
-
+ "$"
|
|
625
|
-
)
|
|
626
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
627
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
628
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
629
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
630
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
605
|
+
|
|
606
|
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
|
631
607
|
pattern = re.compile(pattern)
|
|
632
608
|
|
|
633
609
|
out = {
|
|
634
|
-
p:
|
|
635
|
-
for p in sorted(allpaths)
|
|
636
|
-
if pattern.match(
|
|
610
|
+
p: info
|
|
611
|
+
for p, info in sorted(allpaths.items())
|
|
612
|
+
if pattern.match(
|
|
613
|
+
(
|
|
614
|
+
p + "/"
|
|
615
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
616
|
+
else p
|
|
617
|
+
)
|
|
618
|
+
)
|
|
637
619
|
}
|
|
638
620
|
|
|
639
|
-
# Return directories only when the glob end by a slash
|
|
640
|
-
# This is needed for posix glob compliance
|
|
641
|
-
if ends:
|
|
642
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
643
|
-
|
|
644
621
|
if detail:
|
|
645
622
|
return out
|
|
646
623
|
else:
|
|
@@ -828,6 +805,16 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
828
805
|
def cat_ranges(
|
|
829
806
|
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
|
830
807
|
):
|
|
808
|
+
"""Get the contents of byte ranges from one or more files
|
|
809
|
+
|
|
810
|
+
Parameters
|
|
811
|
+
----------
|
|
812
|
+
paths: list
|
|
813
|
+
A list of of filepaths on this filesystems
|
|
814
|
+
starts, ends: int or list
|
|
815
|
+
Bytes limits of the read. If using a single int, the same value will be
|
|
816
|
+
used to read all the specified files.
|
|
817
|
+
"""
|
|
831
818
|
if max_gap is not None:
|
|
832
819
|
raise NotImplementedError
|
|
833
820
|
if not isinstance(paths, list):
|
|
@@ -835,7 +822,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
835
822
|
if not isinstance(starts, list):
|
|
836
823
|
starts = [starts] * len(paths)
|
|
837
824
|
if not isinstance(ends, list):
|
|
838
|
-
ends = [
|
|
825
|
+
ends = [ends] * len(paths)
|
|
839
826
|
if len(starts) != len(paths) or len(ends) != len(paths):
|
|
840
827
|
raise ValueError
|
|
841
828
|
out = []
|
|
@@ -889,9 +876,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
889
876
|
else:
|
|
890
877
|
return self.cat_file(paths[0], **kwargs)
|
|
891
878
|
|
|
892
|
-
def get_file(
|
|
893
|
-
self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
|
|
894
|
-
):
|
|
879
|
+
def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
|
|
895
880
|
"""Copy single remote file to local"""
|
|
896
881
|
from .implementations.local import LocalFileSystem
|
|
897
882
|
|
|
@@ -901,7 +886,8 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
901
886
|
os.makedirs(lpath, exist_ok=True)
|
|
902
887
|
return None
|
|
903
888
|
|
|
904
|
-
LocalFileSystem(auto_mkdir=True)
|
|
889
|
+
fs = LocalFileSystem(auto_mkdir=True)
|
|
890
|
+
fs.makedirs(fs._parent(lpath), exist_ok=True)
|
|
905
891
|
|
|
906
892
|
with self.open(rpath, "rb", **kwargs) as f1:
|
|
907
893
|
if outfile is None:
|
|
@@ -925,7 +911,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
925
911
|
rpath,
|
|
926
912
|
lpath,
|
|
927
913
|
recursive=False,
|
|
928
|
-
callback=
|
|
914
|
+
callback=DEFAULT_CALLBACK,
|
|
929
915
|
maxdepth=None,
|
|
930
916
|
**kwargs,
|
|
931
917
|
):
|
|
@@ -979,10 +965,10 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
979
965
|
|
|
980
966
|
callback.set_size(len(lpaths))
|
|
981
967
|
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
|
982
|
-
callback.
|
|
983
|
-
|
|
968
|
+
with callback.branched(rpath, lpath) as child:
|
|
969
|
+
self.get_file(rpath, lpath, callback=child, **kwargs)
|
|
984
970
|
|
|
985
|
-
def put_file(self, lpath, rpath, callback=
|
|
971
|
+
def put_file(self, lpath, rpath, callback=DEFAULT_CALLBACK, **kwargs):
|
|
986
972
|
"""Copy single file to remote"""
|
|
987
973
|
if os.path.isdir(lpath):
|
|
988
974
|
self.makedirs(rpath, exist_ok=True)
|
|
@@ -1007,7 +993,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1007
993
|
lpath,
|
|
1008
994
|
rpath,
|
|
1009
995
|
recursive=False,
|
|
1010
|
-
callback=
|
|
996
|
+
callback=DEFAULT_CALLBACK,
|
|
1011
997
|
maxdepth=None,
|
|
1012
998
|
**kwargs,
|
|
1013
999
|
):
|
|
@@ -1065,8 +1051,8 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1065
1051
|
|
|
1066
1052
|
callback.set_size(len(rpaths))
|
|
1067
1053
|
for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
|
|
1068
|
-
callback.
|
|
1069
|
-
|
|
1054
|
+
with callback.branched(lpath, rpath) as child:
|
|
1055
|
+
self.put_file(lpath, rpath, callback=child, **kwargs)
|
|
1070
1056
|
|
|
1071
1057
|
def head(self, path, size=1024):
|
|
1072
1058
|
"""Get the first ``size`` bytes from file"""
|
|
@@ -1146,7 +1132,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1146
1132
|
if maxdepth is not None and maxdepth < 1:
|
|
1147
1133
|
raise ValueError("maxdepth must be at least 1")
|
|
1148
1134
|
|
|
1149
|
-
if isinstance(path, str):
|
|
1135
|
+
if isinstance(path, (str, os.PathLike)):
|
|
1150
1136
|
out = self.expand_path([path], recursive, maxdepth)
|
|
1151
1137
|
else:
|
|
1152
1138
|
out = set()
|
|
@@ -1412,7 +1398,9 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1412
1398
|
)
|
|
1413
1399
|
return json.dumps(
|
|
1414
1400
|
dict(
|
|
1415
|
-
|
|
1401
|
+
cls=cls,
|
|
1402
|
+
protocol=proto,
|
|
1403
|
+
args=self.storage_args,
|
|
1416
1404
|
**self.storage_options,
|
|
1417
1405
|
)
|
|
1418
1406
|
)
|
|
@@ -1703,6 +1691,8 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1703
1691
|
|
|
1704
1692
|
def __eq__(self, other):
|
|
1705
1693
|
"""Files are equal if they have the same checksum, only in read mode"""
|
|
1694
|
+
if self is other:
|
|
1695
|
+
return True
|
|
1706
1696
|
return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
|
|
1707
1697
|
|
|
1708
1698
|
def commit(self):
|
fsspec/tests/abstract/common.py
CHANGED
|
@@ -107,9 +107,9 @@ GLOB_EDGE_CASES_TESTS = {
|
|
|
107
107
|
"subdir1/subfile2",
|
|
108
108
|
],
|
|
109
109
|
),
|
|
110
|
-
("
|
|
110
|
+
("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
|
111
111
|
(
|
|
112
|
-
"
|
|
112
|
+
"**/*1",
|
|
113
113
|
True,
|
|
114
114
|
None,
|
|
115
115
|
[
|
|
@@ -120,14 +120,14 @@ GLOB_EDGE_CASES_TESTS = {
|
|
|
120
120
|
"subdir1/nesteddir/nestedfile",
|
|
121
121
|
],
|
|
122
122
|
),
|
|
123
|
-
("
|
|
123
|
+
("**/*1", True, 1, ["file1"]),
|
|
124
124
|
(
|
|
125
|
-
"
|
|
125
|
+
"**/*1",
|
|
126
126
|
True,
|
|
127
127
|
2,
|
|
128
128
|
["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
|
|
129
129
|
),
|
|
130
|
-
("
|
|
130
|
+
("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
|
|
131
131
|
("**/subdir0", False, None, []),
|
|
132
132
|
("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
|
|
133
133
|
("**/subdir0/nested*", False, 2, []),
|
fsspec/tests/abstract/copy.py
CHANGED
|
@@ -128,7 +128,9 @@ class AbstractCopyTests:
|
|
|
128
128
|
|
|
129
129
|
# Without recursive does nothing
|
|
130
130
|
fs.cp(s, t)
|
|
131
|
-
assert fs.ls(target) == (
|
|
131
|
+
assert fs.ls(target, detail=False) == (
|
|
132
|
+
[] if supports_empty_directories else [dummy]
|
|
133
|
+
)
|
|
132
134
|
|
|
133
135
|
# With recursive
|
|
134
136
|
fs.cp(s, t, recursive=True)
|
|
@@ -155,7 +157,9 @@ class AbstractCopyTests:
|
|
|
155
157
|
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
|
156
158
|
|
|
157
159
|
fs.rm(fs_join(target, "subdir"), recursive=True)
|
|
158
|
-
assert fs.ls(target) == (
|
|
160
|
+
assert fs.ls(target, detail=False) == (
|
|
161
|
+
[] if supports_empty_directories else [dummy]
|
|
162
|
+
)
|
|
159
163
|
|
|
160
164
|
# Limit recursive by maxdepth
|
|
161
165
|
fs.cp(s, t, recursive=True, maxdepth=1)
|
|
@@ -179,7 +183,9 @@ class AbstractCopyTests:
|
|
|
179
183
|
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
|
180
184
|
|
|
181
185
|
fs.rm(fs_join(target, "subdir"), recursive=True)
|
|
182
|
-
assert fs.ls(target) == (
|
|
186
|
+
assert fs.ls(target, detail=False) == (
|
|
187
|
+
[] if supports_empty_directories else [dummy]
|
|
188
|
+
)
|
|
183
189
|
|
|
184
190
|
def test_copy_directory_to_new_directory(
|
|
185
191
|
self,
|
|
@@ -271,7 +277,9 @@ class AbstractCopyTests:
|
|
|
271
277
|
],
|
|
272
278
|
recursive=True,
|
|
273
279
|
)
|
|
274
|
-
assert fs.ls(target) == (
|
|
280
|
+
assert fs.ls(target, detail=False) == (
|
|
281
|
+
[] if supports_empty_directories else [dummy]
|
|
282
|
+
)
|
|
275
283
|
|
|
276
284
|
# With recursive
|
|
277
285
|
for glob, recursive in zip(["*", "**"], [True, False]):
|
|
@@ -290,7 +298,9 @@ class AbstractCopyTests:
|
|
|
290
298
|
],
|
|
291
299
|
recursive=True,
|
|
292
300
|
)
|
|
293
|
-
assert fs.ls(target) == (
|
|
301
|
+
assert fs.ls(target, detail=False) == (
|
|
302
|
+
[] if supports_empty_directories else [dummy]
|
|
303
|
+
)
|
|
294
304
|
|
|
295
305
|
# Limit recursive by maxdepth
|
|
296
306
|
fs.cp(
|
|
@@ -308,7 +318,9 @@ class AbstractCopyTests:
|
|
|
308
318
|
],
|
|
309
319
|
recursive=True,
|
|
310
320
|
)
|
|
311
|
-
assert fs.ls(target) == (
|
|
321
|
+
assert fs.ls(target, detail=False) == (
|
|
322
|
+
[] if supports_empty_directories else [dummy]
|
|
323
|
+
)
|
|
312
324
|
|
|
313
325
|
def test_copy_glob_to_new_directory(
|
|
314
326
|
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
|
@@ -451,7 +463,9 @@ class AbstractCopyTests:
|
|
|
451
463
|
],
|
|
452
464
|
recursive=True,
|
|
453
465
|
)
|
|
454
|
-
assert fs.ls(target) == (
|
|
466
|
+
assert fs.ls(target, detail=False) == (
|
|
467
|
+
[] if supports_empty_directories else [dummy]
|
|
468
|
+
)
|
|
455
469
|
|
|
456
470
|
def test_copy_list_of_files_to_new_directory(
|
|
457
471
|
self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
|
fsspec/tests/abstract/put.py
CHANGED
|
@@ -131,7 +131,9 @@ class AbstractPutTests:
|
|
|
131
131
|
|
|
132
132
|
# Without recursive does nothing
|
|
133
133
|
fs.put(s, t)
|
|
134
|
-
assert fs.ls(target) == (
|
|
134
|
+
assert fs.ls(target, detail=False) == (
|
|
135
|
+
[] if supports_empty_directories else [dummy]
|
|
136
|
+
)
|
|
135
137
|
|
|
136
138
|
# With recursive
|
|
137
139
|
fs.put(s, t, recursive=True)
|
|
@@ -158,7 +160,9 @@ class AbstractPutTests:
|
|
|
158
160
|
assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
|
|
159
161
|
|
|
160
162
|
fs.rm(fs_join(target, "subdir"), recursive=True)
|
|
161
|
-
assert fs.ls(target) == (
|
|
163
|
+
assert fs.ls(target, detail=False) == (
|
|
164
|
+
[] if supports_empty_directories else [dummy]
|
|
165
|
+
)
|
|
162
166
|
|
|
163
167
|
# Limit recursive by maxdepth
|
|
164
168
|
fs.put(s, t, recursive=True, maxdepth=1)
|
|
@@ -182,7 +186,9 @@ class AbstractPutTests:
|
|
|
182
186
|
assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
|
|
183
187
|
|
|
184
188
|
fs.rm(fs_join(target, "subdir"), recursive=True)
|
|
185
|
-
assert fs.ls(target) == (
|
|
189
|
+
assert fs.ls(target, detail=False) == (
|
|
190
|
+
[] if supports_empty_directories else [dummy]
|
|
191
|
+
)
|
|
186
192
|
|
|
187
193
|
def test_put_directory_to_new_directory(
|
|
188
194
|
self,
|
|
@@ -275,7 +281,9 @@ class AbstractPutTests:
|
|
|
275
281
|
],
|
|
276
282
|
recursive=True,
|
|
277
283
|
)
|
|
278
|
-
assert fs.ls(target) == (
|
|
284
|
+
assert fs.ls(target, detail=False) == (
|
|
285
|
+
[] if supports_empty_directories else [dummy]
|
|
286
|
+
)
|
|
279
287
|
|
|
280
288
|
# With recursive
|
|
281
289
|
for glob, recursive in zip(["*", "**"], [True, False]):
|
|
@@ -294,7 +302,9 @@ class AbstractPutTests:
|
|
|
294
302
|
],
|
|
295
303
|
recursive=True,
|
|
296
304
|
)
|
|
297
|
-
assert fs.ls(target) == (
|
|
305
|
+
assert fs.ls(target, detail=False) == (
|
|
306
|
+
[] if supports_empty_directories else [dummy]
|
|
307
|
+
)
|
|
298
308
|
|
|
299
309
|
# Limit recursive by maxdepth
|
|
300
310
|
fs.put(
|
|
@@ -315,7 +325,9 @@ class AbstractPutTests:
|
|
|
315
325
|
],
|
|
316
326
|
recursive=True,
|
|
317
327
|
)
|
|
318
|
-
assert fs.ls(target) == (
|
|
328
|
+
assert fs.ls(target, detail=False) == (
|
|
329
|
+
[] if supports_empty_directories else [dummy]
|
|
330
|
+
)
|
|
319
331
|
|
|
320
332
|
def test_put_glob_to_new_directory(
|
|
321
333
|
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
|
@@ -463,7 +475,9 @@ class AbstractPutTests:
|
|
|
463
475
|
],
|
|
464
476
|
recursive=True,
|
|
465
477
|
)
|
|
466
|
-
assert fs.ls(target) == (
|
|
478
|
+
assert fs.ls(target, detail=False) == (
|
|
479
|
+
[] if supports_empty_directories else [dummy]
|
|
480
|
+
)
|
|
467
481
|
|
|
468
482
|
def test_put_list_of_files_to_new_directory(
|
|
469
483
|
self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
|
fsspec/transaction.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from collections import deque
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class Transaction:
|
|
2
5
|
"""Filesystem transaction write context
|
|
3
6
|
|
|
@@ -13,10 +16,11 @@ class Transaction:
|
|
|
13
16
|
fs: FileSystem instance
|
|
14
17
|
"""
|
|
15
18
|
self.fs = fs
|
|
16
|
-
self.files =
|
|
19
|
+
self.files = deque()
|
|
17
20
|
|
|
18
21
|
def __enter__(self):
|
|
19
22
|
self.start()
|
|
23
|
+
return self
|
|
20
24
|
|
|
21
25
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
22
26
|
"""End transaction and commit, if exit is not due to exception"""
|
|
@@ -27,17 +31,17 @@ class Transaction:
|
|
|
27
31
|
|
|
28
32
|
def start(self):
|
|
29
33
|
"""Start a transaction on this FileSystem"""
|
|
30
|
-
self.files =
|
|
34
|
+
self.files = deque() # clean up after previous failed completions
|
|
31
35
|
self.fs._intrans = True
|
|
32
36
|
|
|
33
37
|
def complete(self, commit=True):
|
|
34
38
|
"""Finish transaction: commit or discard all deferred files"""
|
|
35
|
-
|
|
39
|
+
while self.files:
|
|
40
|
+
f = self.files.popleft()
|
|
36
41
|
if commit:
|
|
37
42
|
f.commit()
|
|
38
43
|
else:
|
|
39
44
|
f.discard()
|
|
40
|
-
self.files = []
|
|
41
45
|
self.fs._intrans = False
|
|
42
46
|
|
|
43
47
|
|
fsspec/utils.py
CHANGED
|
@@ -320,7 +320,7 @@ def tokenize(*args: Any, **kwargs: Any) -> str:
|
|
|
320
320
|
h = md5(str(args).encode())
|
|
321
321
|
except ValueError:
|
|
322
322
|
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
|
323
|
-
h = md5(str(args).encode(), usedforsecurity=False)
|
|
323
|
+
h = md5(str(args).encode(), usedforsecurity=False)
|
|
324
324
|
return h.hexdigest()
|
|
325
325
|
|
|
326
326
|
|
|
@@ -436,6 +436,7 @@ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
|
|
436
436
|
|
|
437
437
|
|
|
438
438
|
def get_protocol(url: str) -> str:
|
|
439
|
+
url = stringify_path(url)
|
|
439
440
|
parts = re.split(r"(\:\:|\://)", url, 1)
|
|
440
441
|
if len(parts) > 1:
|
|
441
442
|
return parts[0]
|
|
@@ -627,3 +628,115 @@ def atomic_write(path: str, mode: str = "wb"):
|
|
|
627
628
|
raise
|
|
628
629
|
else:
|
|
629
630
|
os.replace(fn, path)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _translate(pat, STAR, QUESTION_MARK):
|
|
634
|
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
|
635
|
+
res: list[str] = []
|
|
636
|
+
add = res.append
|
|
637
|
+
i, n = 0, len(pat)
|
|
638
|
+
while i < n:
|
|
639
|
+
c = pat[i]
|
|
640
|
+
i = i + 1
|
|
641
|
+
if c == "*":
|
|
642
|
+
# compress consecutive `*` into one
|
|
643
|
+
if (not res) or res[-1] is not STAR:
|
|
644
|
+
add(STAR)
|
|
645
|
+
elif c == "?":
|
|
646
|
+
add(QUESTION_MARK)
|
|
647
|
+
elif c == "[":
|
|
648
|
+
j = i
|
|
649
|
+
if j < n and pat[j] == "!":
|
|
650
|
+
j = j + 1
|
|
651
|
+
if j < n and pat[j] == "]":
|
|
652
|
+
j = j + 1
|
|
653
|
+
while j < n and pat[j] != "]":
|
|
654
|
+
j = j + 1
|
|
655
|
+
if j >= n:
|
|
656
|
+
add("\\[")
|
|
657
|
+
else:
|
|
658
|
+
stuff = pat[i:j]
|
|
659
|
+
if "-" not in stuff:
|
|
660
|
+
stuff = stuff.replace("\\", r"\\")
|
|
661
|
+
else:
|
|
662
|
+
chunks = []
|
|
663
|
+
k = i + 2 if pat[i] == "!" else i + 1
|
|
664
|
+
while True:
|
|
665
|
+
k = pat.find("-", k, j)
|
|
666
|
+
if k < 0:
|
|
667
|
+
break
|
|
668
|
+
chunks.append(pat[i:k])
|
|
669
|
+
i = k + 1
|
|
670
|
+
k = k + 3
|
|
671
|
+
chunk = pat[i:j]
|
|
672
|
+
if chunk:
|
|
673
|
+
chunks.append(chunk)
|
|
674
|
+
else:
|
|
675
|
+
chunks[-1] += "-"
|
|
676
|
+
# Remove empty ranges -- invalid in RE.
|
|
677
|
+
for k in range(len(chunks) - 1, 0, -1):
|
|
678
|
+
if chunks[k - 1][-1] > chunks[k][0]:
|
|
679
|
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
|
680
|
+
del chunks[k]
|
|
681
|
+
# Escape backslashes and hyphens for set difference (--).
|
|
682
|
+
# Hyphens that create ranges shouldn't be escaped.
|
|
683
|
+
stuff = "-".join(
|
|
684
|
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
|
685
|
+
)
|
|
686
|
+
# Escape set operations (&&, ~~ and ||).
|
|
687
|
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
|
688
|
+
i = j + 1
|
|
689
|
+
if not stuff:
|
|
690
|
+
# Empty range: never match.
|
|
691
|
+
add("(?!)")
|
|
692
|
+
elif stuff == "!":
|
|
693
|
+
# Negated empty range: match any character.
|
|
694
|
+
add(".")
|
|
695
|
+
else:
|
|
696
|
+
if stuff[0] == "!":
|
|
697
|
+
stuff = "^" + stuff[1:]
|
|
698
|
+
elif stuff[0] in ("^", "["):
|
|
699
|
+
stuff = "\\" + stuff
|
|
700
|
+
add(f"[{stuff}]")
|
|
701
|
+
else:
|
|
702
|
+
add(re.escape(c))
|
|
703
|
+
assert i == n
|
|
704
|
+
return res
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def glob_translate(pat):
|
|
708
|
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
|
709
|
+
# The keyword parameters' values are fixed to:
|
|
710
|
+
# recursive=True, include_hidden=True, seps=None
|
|
711
|
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
|
712
|
+
if os.path.altsep:
|
|
713
|
+
seps = os.path.sep + os.path.altsep
|
|
714
|
+
else:
|
|
715
|
+
seps = os.path.sep
|
|
716
|
+
escaped_seps = "".join(map(re.escape, seps))
|
|
717
|
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
|
718
|
+
not_sep = f"[^{escaped_seps}]"
|
|
719
|
+
one_last_segment = f"{not_sep}+"
|
|
720
|
+
one_segment = f"{one_last_segment}{any_sep}"
|
|
721
|
+
any_segments = f"(?:.+{any_sep})?"
|
|
722
|
+
any_last_segments = ".*"
|
|
723
|
+
results = []
|
|
724
|
+
parts = re.split(any_sep, pat)
|
|
725
|
+
last_part_idx = len(parts) - 1
|
|
726
|
+
for idx, part in enumerate(parts):
|
|
727
|
+
if part == "*":
|
|
728
|
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
|
729
|
+
continue
|
|
730
|
+
if part == "**":
|
|
731
|
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
|
732
|
+
continue
|
|
733
|
+
elif "**" in part:
|
|
734
|
+
raise ValueError(
|
|
735
|
+
"Invalid pattern: '**' can only be an entire path component"
|
|
736
|
+
)
|
|
737
|
+
if part:
|
|
738
|
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
|
739
|
+
if idx < last_part_idx:
|
|
740
|
+
results.append(any_sep)
|
|
741
|
+
res = "".join(results)
|
|
742
|
+
return rf"(?s:{res})\Z"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2024.2.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Home-page: https://github.com/fsspec/filesystem_spec
|
|
6
6
|
Maintainer: Martin Durant
|
|
@@ -71,7 +71,6 @@ Requires-Dist: panel ; extra == 'gui'
|
|
|
71
71
|
Provides-Extra: hdfs
|
|
72
72
|
Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
|
|
73
73
|
Provides-Extra: http
|
|
74
|
-
Requires-Dist: requests ; extra == 'http'
|
|
75
74
|
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
|
|
76
75
|
Provides-Extra: libarchive
|
|
77
76
|
Requires-Dist: libarchive-c ; extra == 'libarchive'
|