fsspec 2024.9.0__py3-none-any.whl → 2024.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/asyn.py +9 -7
- fsspec/caching.py +34 -19
- fsspec/core.py +20 -15
- fsspec/implementations/asyn_wrapper.py +98 -0
- fsspec/implementations/cached.py +1 -1
- fsspec/implementations/dirfs.py +12 -0
- fsspec/implementations/ftp.py +1 -1
- fsspec/implementations/git.py +27 -39
- fsspec/implementations/http.py +14 -30
- fsspec/implementations/local.py +6 -1
- fsspec/implementations/memory.py +15 -6
- fsspec/implementations/reference.py +58 -15
- fsspec/implementations/webhdfs.py +2 -1
- fsspec/implementations/zip.py +2 -1
- fsspec/mapping.py +1 -1
- fsspec/parquet.py +1 -1
- fsspec/registry.py +4 -0
- fsspec/spec.py +209 -35
- fsspec/tests/abstract/__init__.py +3 -1
- fsspec/tests/abstract/open.py +11 -0
- fsspec/tests/abstract/pipe.py +11 -0
- fsspec/utils.py +4 -2
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/METADATA +3 -2
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/RECORD +27 -24
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/WHEEL +1 -1
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,9 +5,9 @@ import itertools
|
|
|
5
5
|
import logging
|
|
6
6
|
import math
|
|
7
7
|
import os
|
|
8
|
-
from itertools import chain
|
|
9
8
|
from functools import lru_cache
|
|
10
|
-
from
|
|
9
|
+
from itertools import chain
|
|
10
|
+
from typing import TYPE_CHECKING, Literal
|
|
11
11
|
|
|
12
12
|
import fsspec.core
|
|
13
13
|
|
|
@@ -20,6 +20,7 @@ except ImportError:
|
|
|
20
20
|
from fsspec.asyn import AsyncFileSystem
|
|
21
21
|
from fsspec.callbacks import DEFAULT_CALLBACK
|
|
22
22
|
from fsspec.core import filesystem, open, split_protocol
|
|
23
|
+
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
|
|
23
24
|
from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger("fsspec.reference")
|
|
@@ -41,7 +42,7 @@ def _first(d):
|
|
|
41
42
|
|
|
42
43
|
def _prot_in_references(path, references):
|
|
43
44
|
ref = references.get(path)
|
|
44
|
-
if isinstance(ref, (list, tuple)):
|
|
45
|
+
if isinstance(ref, (list, tuple)) and isinstance(ref[0], str):
|
|
45
46
|
return split_protocol(ref[0])[0] if ref[0] else ref[0]
|
|
46
47
|
|
|
47
48
|
|
|
@@ -104,7 +105,13 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
104
105
|
return pd
|
|
105
106
|
|
|
106
107
|
def __init__(
|
|
107
|
-
self,
|
|
108
|
+
self,
|
|
109
|
+
root,
|
|
110
|
+
fs=None,
|
|
111
|
+
out_root=None,
|
|
112
|
+
cache_size=128,
|
|
113
|
+
categorical_threshold=10,
|
|
114
|
+
engine: Literal["fastparquet", "pyarrow"] = "fastparquet",
|
|
108
115
|
):
|
|
109
116
|
"""
|
|
110
117
|
|
|
@@ -126,16 +133,25 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
126
133
|
Encode urls as pandas.Categorical to reduce memory footprint if the ratio
|
|
127
134
|
of the number of unique urls to total number of refs for each variable
|
|
128
135
|
is greater than or equal to this number. (default 10)
|
|
136
|
+
engine: Literal["fastparquet","pyarrow"]
|
|
137
|
+
Engine choice for reading parquet files. (default is "fastparquet")
|
|
129
138
|
"""
|
|
139
|
+
|
|
130
140
|
self.root = root
|
|
131
141
|
self.chunk_sizes = {}
|
|
132
142
|
self.out_root = out_root or self.root
|
|
133
143
|
self.cat_thresh = categorical_threshold
|
|
144
|
+
self.engine = engine
|
|
134
145
|
self.cache_size = cache_size
|
|
135
146
|
self.url = self.root + "/{field}/refs.{record}.parq"
|
|
136
147
|
# TODO: derive fs from `root`
|
|
137
148
|
self.fs = fsspec.filesystem("file") if fs is None else fs
|
|
138
149
|
|
|
150
|
+
from importlib.util import find_spec
|
|
151
|
+
|
|
152
|
+
if self.engine == "pyarrow" and find_spec("pyarrow") is None:
|
|
153
|
+
raise ImportError("engine choice `pyarrow` is not installed.")
|
|
154
|
+
|
|
139
155
|
def __getattr__(self, item):
|
|
140
156
|
if item in ("_items", "record_size", "zmetadata"):
|
|
141
157
|
self.setup()
|
|
@@ -158,8 +174,11 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
158
174
|
"""cached parquet file loader"""
|
|
159
175
|
path = self.url.format(field=field, record=record)
|
|
160
176
|
data = io.BytesIO(self.fs.cat_file(path))
|
|
161
|
-
|
|
162
|
-
|
|
177
|
+
try:
|
|
178
|
+
df = self.pd.read_parquet(data, engine=self.engine)
|
|
179
|
+
refs = {c: df[c].to_numpy() for c in df.columns}
|
|
180
|
+
except OSError:
|
|
181
|
+
refs = None
|
|
163
182
|
return refs
|
|
164
183
|
|
|
165
184
|
self.open_refs = open_refs
|
|
@@ -413,7 +432,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
413
432
|
if len(partition) < self.record_size:
|
|
414
433
|
try:
|
|
415
434
|
original = self.open_refs(field, record)
|
|
416
|
-
except
|
|
435
|
+
except OSError:
|
|
417
436
|
pass
|
|
418
437
|
|
|
419
438
|
if original:
|
|
@@ -463,18 +482,28 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
463
482
|
|
|
464
483
|
fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
|
|
465
484
|
self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
|
|
485
|
+
|
|
486
|
+
if self.engine == "pyarrow":
|
|
487
|
+
df_backend_kwargs = {"write_statistics": False}
|
|
488
|
+
elif self.engine == "fastparquet":
|
|
489
|
+
df_backend_kwargs = {
|
|
490
|
+
"stats": False,
|
|
491
|
+
"object_encoding": object_encoding,
|
|
492
|
+
"has_nulls": has_nulls,
|
|
493
|
+
}
|
|
494
|
+
else:
|
|
495
|
+
raise NotImplementedError(f"{self.engine} not supported")
|
|
496
|
+
|
|
466
497
|
df.to_parquet(
|
|
467
498
|
fn,
|
|
468
|
-
engine=
|
|
499
|
+
engine=self.engine,
|
|
469
500
|
storage_options=storage_options
|
|
470
501
|
or getattr(self.fs, "storage_options", None),
|
|
471
502
|
compression="zstd",
|
|
472
503
|
index=False,
|
|
473
|
-
|
|
474
|
-
object_encoding=object_encoding,
|
|
475
|
-
has_nulls=has_nulls,
|
|
476
|
-
# **kwargs,
|
|
504
|
+
**df_backend_kwargs,
|
|
477
505
|
)
|
|
506
|
+
|
|
478
507
|
partition.clear()
|
|
479
508
|
self._items.pop((field, record))
|
|
480
509
|
|
|
@@ -486,6 +515,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
486
515
|
base_url: str
|
|
487
516
|
Location of the output
|
|
488
517
|
"""
|
|
518
|
+
|
|
489
519
|
# write what we have so far and clear sub chunks
|
|
490
520
|
for thing in list(self._items):
|
|
491
521
|
if isinstance(thing, tuple):
|
|
@@ -728,6 +758,10 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
728
758
|
self.fss[remote_protocol] = fs
|
|
729
759
|
|
|
730
760
|
self.fss[None] = fs or filesystem("file") # default one
|
|
761
|
+
# Wrap any non-async filesystems to ensure async methods are available below
|
|
762
|
+
for k, f in self.fss.items():
|
|
763
|
+
if not f.async_impl:
|
|
764
|
+
self.fss[k] = AsyncFileSystemWrapper(f)
|
|
731
765
|
|
|
732
766
|
def _cat_common(self, path, start=None, end=None):
|
|
733
767
|
path = self._strip_protocol(path)
|
|
@@ -777,7 +811,9 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
777
811
|
return part_or_url[start:end]
|
|
778
812
|
protocol, _ = split_protocol(part_or_url)
|
|
779
813
|
try:
|
|
780
|
-
await self.fss[protocol]._cat_file(
|
|
814
|
+
return await self.fss[protocol]._cat_file(
|
|
815
|
+
part_or_url, start=start0, end=end0
|
|
816
|
+
)
|
|
781
817
|
except Exception as e:
|
|
782
818
|
raise ReferenceNotReachable(path, part_or_url) from e
|
|
783
819
|
|
|
@@ -845,6 +881,9 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
845
881
|
# found and on_error is "raise"
|
|
846
882
|
try:
|
|
847
883
|
u, s, e = self._cat_common(p)
|
|
884
|
+
if not isinstance(u, (bytes, str)):
|
|
885
|
+
# nan/None from parquet
|
|
886
|
+
continue
|
|
848
887
|
except FileNotFoundError as err:
|
|
849
888
|
if on_error == "raise":
|
|
850
889
|
raise
|
|
@@ -1147,13 +1186,17 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
1147
1186
|
) # ignores FileNotFound, just as well for directories
|
|
1148
1187
|
self.dircache.clear() # this is a bit heavy handed
|
|
1149
1188
|
|
|
1150
|
-
async def _pipe_file(self, path, data):
|
|
1189
|
+
async def _pipe_file(self, path, data, mode="overwrite", **kwargs):
|
|
1190
|
+
if mode == "create" and self.exists(path):
|
|
1191
|
+
raise FileExistsError
|
|
1151
1192
|
# can be str or bytes
|
|
1152
1193
|
self.references[path] = data
|
|
1153
1194
|
self.dircache.clear() # this is a bit heavy handed
|
|
1154
1195
|
|
|
1155
|
-
async def _put_file(self, lpath, rpath, **kwargs):
|
|
1196
|
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
|
1156
1197
|
# puts binary
|
|
1198
|
+
if mode == "create" and self.exists(rpath):
|
|
1199
|
+
raise FileExistsError
|
|
1157
1200
|
with open(lpath, "rb") as f:
|
|
1158
1201
|
self.references[rpath] = f.read()
|
|
1159
1202
|
self.dircache.clear() # this is a bit heavy handed
|
|
@@ -166,7 +166,8 @@ class WebHDFS(AbstractFileSystem):
|
|
|
166
166
|
self.session.auth = HTTPBasicAuth(self.user, self.password)
|
|
167
167
|
|
|
168
168
|
def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
|
|
169
|
-
|
|
169
|
+
path = self._strip_protocol(path) if path is not None else ""
|
|
170
|
+
url = self._apply_proxy(self.url + quote(path, safe="/="))
|
|
170
171
|
args = kwargs.copy()
|
|
171
172
|
args.update(self.pars)
|
|
172
173
|
args["op"] = op.upper()
|
fsspec/implementations/zip.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import zipfile
|
|
2
3
|
|
|
3
4
|
import fsspec
|
|
@@ -48,7 +49,7 @@ class ZipFileSystem(AbstractArchiveFileSystem):
|
|
|
48
49
|
if mode not in set("rwa"):
|
|
49
50
|
raise ValueError(f"mode '{mode}' no understood")
|
|
50
51
|
self.mode = mode
|
|
51
|
-
if isinstance(fo, str):
|
|
52
|
+
if isinstance(fo, (str, os.PathLike)):
|
|
52
53
|
if mode == "a":
|
|
53
54
|
m = "r+b"
|
|
54
55
|
else:
|
fsspec/mapping.py
CHANGED
|
@@ -112,7 +112,7 @@ class FSMap(MutableMapping):
|
|
|
112
112
|
for k, v in out.items()
|
|
113
113
|
}
|
|
114
114
|
return {
|
|
115
|
-
key: out[k2]
|
|
115
|
+
key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
|
|
116
116
|
for key, k2 in zip(keys, keys2)
|
|
117
117
|
if on_error == "return" or not isinstance(out[k2], BaseException)
|
|
118
118
|
}
|
fsspec/parquet.py
CHANGED
fsspec/registry.py
CHANGED
|
@@ -202,6 +202,10 @@ known_implementations = {
|
|
|
202
202
|
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
|
203
203
|
},
|
|
204
204
|
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
|
205
|
+
"tosfs": {
|
|
206
|
+
"class": "tosfs.TosFileSystem",
|
|
207
|
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
|
208
|
+
},
|
|
205
209
|
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
|
206
210
|
"webdav": {
|
|
207
211
|
"class": "webdav4.fsspec.WebdavFileSystem",
|
fsspec/spec.py
CHANGED
|
@@ -10,7 +10,7 @@ import weakref
|
|
|
10
10
|
from errno import ESPIPE
|
|
11
11
|
from glob import has_magic
|
|
12
12
|
from hashlib import sha256
|
|
13
|
-
from typing import Any, ClassVar
|
|
13
|
+
from typing import Any, ClassVar
|
|
14
14
|
|
|
15
15
|
from .callbacks import DEFAULT_CALLBACK
|
|
16
16
|
from .config import apply_config, conf
|
|
@@ -117,8 +117,8 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
117
117
|
_extra_tokenize_attributes = ()
|
|
118
118
|
|
|
119
119
|
# Set by _Cached metaclass
|
|
120
|
-
storage_args:
|
|
121
|
-
storage_options:
|
|
120
|
+
storage_args: tuple[Any, ...]
|
|
121
|
+
storage_options: dict[str, Any]
|
|
122
122
|
|
|
123
123
|
def __init__(self, *args, **storage_options):
|
|
124
124
|
"""Create and configure file-system instance
|
|
@@ -408,7 +408,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
408
408
|
topdown: bool (True)
|
|
409
409
|
Whether to walk the directory tree from the top downwards or from
|
|
410
410
|
the bottom upwards.
|
|
411
|
-
on_error: "omit", "raise", a
|
|
411
|
+
on_error: "omit", "raise", a callable
|
|
412
412
|
if omit (default), path with exception will simply be empty;
|
|
413
413
|
If raise, an underlying exception will be raised;
|
|
414
414
|
if callable, it will be called with a single OSError instance as argument
|
|
@@ -428,11 +428,9 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
428
428
|
except (FileNotFoundError, OSError) as e:
|
|
429
429
|
if on_error == "raise":
|
|
430
430
|
raise
|
|
431
|
-
|
|
431
|
+
if callable(on_error):
|
|
432
432
|
on_error(e)
|
|
433
|
-
|
|
434
|
-
return path, {}, {}
|
|
435
|
-
return path, [], []
|
|
433
|
+
return
|
|
436
434
|
|
|
437
435
|
for info in listing:
|
|
438
436
|
# each info name must be at least [path]/part , but here
|
|
@@ -617,11 +615,9 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
617
615
|
p: info
|
|
618
616
|
for p, info in sorted(allpaths.items())
|
|
619
617
|
if pattern.match(
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
else p
|
|
624
|
-
)
|
|
618
|
+
p + "/"
|
|
619
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
620
|
+
else p
|
|
625
621
|
)
|
|
626
622
|
}
|
|
627
623
|
|
|
@@ -650,7 +646,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
650
646
|
Returns a single dictionary, with exactly the same information as ``ls``
|
|
651
647
|
would with ``detail=True``.
|
|
652
648
|
|
|
653
|
-
The default implementation
|
|
649
|
+
The default implementation calls ls and could be overridden by a
|
|
654
650
|
shortcut. kwargs are passed on to ```ls()``.
|
|
655
651
|
|
|
656
652
|
Some file systems might not be able to measure the file's size, in
|
|
@@ -782,8 +778,12 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
782
778
|
return f.read(end - f.tell())
|
|
783
779
|
return f.read()
|
|
784
780
|
|
|
785
|
-
def pipe_file(self, path, value, **kwargs):
|
|
781
|
+
def pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
786
782
|
"""Set the bytes of given file"""
|
|
783
|
+
if mode == "create" and self.exists(path):
|
|
784
|
+
# non-atomic but simple way; or could use "xb" in open(), which is likely
|
|
785
|
+
# not as well supported
|
|
786
|
+
raise FileExistsError
|
|
787
787
|
with self.open(path, "wb", **kwargs) as f:
|
|
788
788
|
f.write(value)
|
|
789
789
|
|
|
@@ -975,8 +975,12 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
975
975
|
with callback.branched(rpath, lpath) as child:
|
|
976
976
|
self.get_file(rpath, lpath, callback=child, **kwargs)
|
|
977
977
|
|
|
978
|
-
def put_file(
|
|
978
|
+
def put_file(
|
|
979
|
+
self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
|
|
980
|
+
):
|
|
979
981
|
"""Copy single file to remote"""
|
|
982
|
+
if mode == "create" and self.exists(rpath):
|
|
983
|
+
raise FileExistsError
|
|
980
984
|
if os.path.isdir(lpath):
|
|
981
985
|
self.makedirs(rpath, exist_ok=True)
|
|
982
986
|
return None
|
|
@@ -1266,6 +1270,9 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1266
1270
|
Target file
|
|
1267
1271
|
mode: str like 'rb', 'w'
|
|
1268
1272
|
See builtin ``open()``
|
|
1273
|
+
Mode "x" (exclusive write) may be implemented by the backend. Even if
|
|
1274
|
+
it is, whether it is checked up front or on commit, and whether it is
|
|
1275
|
+
atomic is implementation-dependent.
|
|
1269
1276
|
block_size: int
|
|
1270
1277
|
Some indication of buffering - this is a value in bytes
|
|
1271
1278
|
cache_options : dict, optional
|
|
@@ -1444,7 +1451,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1444
1451
|
|
|
1445
1452
|
return json.loads(blob, cls=FilesystemJSONDecoder)
|
|
1446
1453
|
|
|
1447
|
-
def to_dict(self, *, include_password: bool = True) ->
|
|
1454
|
+
def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
|
|
1448
1455
|
"""
|
|
1449
1456
|
JSON-serializable dictionary representation of this filesystem instance.
|
|
1450
1457
|
|
|
@@ -1485,7 +1492,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1485
1492
|
)
|
|
1486
1493
|
|
|
1487
1494
|
@staticmethod
|
|
1488
|
-
def from_dict(dct:
|
|
1495
|
+
def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
|
|
1489
1496
|
"""
|
|
1490
1497
|
Recreate a filesystem instance from dictionary representation.
|
|
1491
1498
|
|
|
@@ -1569,6 +1576,141 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1569
1576
|
"""Return the modified timestamp of a file as a datetime.datetime"""
|
|
1570
1577
|
raise NotImplementedError
|
|
1571
1578
|
|
|
1579
|
+
def tree(
|
|
1580
|
+
self,
|
|
1581
|
+
path: str = "/",
|
|
1582
|
+
recursion_limit: int = 2,
|
|
1583
|
+
max_display: int = 25,
|
|
1584
|
+
display_size: bool = False,
|
|
1585
|
+
prefix: str = "",
|
|
1586
|
+
is_last: bool = True,
|
|
1587
|
+
first: bool = True,
|
|
1588
|
+
indent_size: int = 4,
|
|
1589
|
+
) -> str:
|
|
1590
|
+
"""
|
|
1591
|
+
Return a tree-like structure of the filesystem starting from the given path as a string.
|
|
1592
|
+
|
|
1593
|
+
Parameters
|
|
1594
|
+
----------
|
|
1595
|
+
path: Root path to start traversal from
|
|
1596
|
+
recursion_limit: Maximum depth of directory traversal
|
|
1597
|
+
max_display: Maximum number of items to display per directory
|
|
1598
|
+
display_size: Whether to display file sizes
|
|
1599
|
+
prefix: Current line prefix for visual tree structure
|
|
1600
|
+
is_last: Whether current item is last in its level
|
|
1601
|
+
first: Whether this is the first call (displays root path)
|
|
1602
|
+
indent_size: Number of spaces by indent
|
|
1603
|
+
|
|
1604
|
+
Returns
|
|
1605
|
+
-------
|
|
1606
|
+
str: A string representing the tree structure.
|
|
1607
|
+
|
|
1608
|
+
Example
|
|
1609
|
+
-------
|
|
1610
|
+
>>> from fsspec import filesystem
|
|
1611
|
+
|
|
1612
|
+
>>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
|
|
1613
|
+
>>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
|
|
1614
|
+
>>> print(tree)
|
|
1615
|
+
"""
|
|
1616
|
+
|
|
1617
|
+
def format_bytes(n: int) -> str:
|
|
1618
|
+
"""Format bytes as text."""
|
|
1619
|
+
for prefix, k in (
|
|
1620
|
+
("P", 2**50),
|
|
1621
|
+
("T", 2**40),
|
|
1622
|
+
("G", 2**30),
|
|
1623
|
+
("M", 2**20),
|
|
1624
|
+
("k", 2**10),
|
|
1625
|
+
):
|
|
1626
|
+
if n >= 0.9 * k:
|
|
1627
|
+
return f"{n / k:.2f} {prefix}b"
|
|
1628
|
+
return f"{n}B"
|
|
1629
|
+
|
|
1630
|
+
result = []
|
|
1631
|
+
|
|
1632
|
+
if first:
|
|
1633
|
+
result.append(path)
|
|
1634
|
+
|
|
1635
|
+
if recursion_limit:
|
|
1636
|
+
indent = " " * indent_size
|
|
1637
|
+
contents = self.ls(path, detail=True)
|
|
1638
|
+
contents.sort(
|
|
1639
|
+
key=lambda x: (x.get("type") != "directory", x.get("name", ""))
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
if max_display is not None and len(contents) > max_display:
|
|
1643
|
+
displayed_contents = contents[:max_display]
|
|
1644
|
+
remaining_count = len(contents) - max_display
|
|
1645
|
+
else:
|
|
1646
|
+
displayed_contents = contents
|
|
1647
|
+
remaining_count = 0
|
|
1648
|
+
|
|
1649
|
+
for i, item in enumerate(displayed_contents):
|
|
1650
|
+
is_last_item = (i == len(displayed_contents) - 1) and (
|
|
1651
|
+
remaining_count == 0
|
|
1652
|
+
)
|
|
1653
|
+
|
|
1654
|
+
branch = (
|
|
1655
|
+
"└" + ("─" * (indent_size - 2))
|
|
1656
|
+
if is_last_item
|
|
1657
|
+
else "├" + ("─" * (indent_size - 2))
|
|
1658
|
+
)
|
|
1659
|
+
branch += " "
|
|
1660
|
+
new_prefix = prefix + (
|
|
1661
|
+
indent if is_last_item else "│" + " " * (indent_size - 1)
|
|
1662
|
+
)
|
|
1663
|
+
|
|
1664
|
+
name = os.path.basename(item.get("name", ""))
|
|
1665
|
+
|
|
1666
|
+
if display_size and item.get("type") == "directory":
|
|
1667
|
+
sub_contents = self.ls(item.get("name", ""), detail=True)
|
|
1668
|
+
num_files = sum(
|
|
1669
|
+
1 for sub_item in sub_contents if sub_item.get("type") == "file"
|
|
1670
|
+
)
|
|
1671
|
+
num_folders = sum(
|
|
1672
|
+
1
|
|
1673
|
+
for sub_item in sub_contents
|
|
1674
|
+
if sub_item.get("type") == "directory"
|
|
1675
|
+
)
|
|
1676
|
+
|
|
1677
|
+
if num_files == 0 and num_folders == 0:
|
|
1678
|
+
size = " (empty folder)"
|
|
1679
|
+
elif num_files == 0:
|
|
1680
|
+
size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
|
1681
|
+
elif num_folders == 0:
|
|
1682
|
+
size = f" ({num_files} file{'s' if num_files > 1 else ''})"
|
|
1683
|
+
else:
|
|
1684
|
+
size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
|
1685
|
+
elif display_size and item.get("type") == "file":
|
|
1686
|
+
size = f" ({format_bytes(item.get('size', 0))})"
|
|
1687
|
+
else:
|
|
1688
|
+
size = ""
|
|
1689
|
+
|
|
1690
|
+
result.append(f"{prefix}{branch}{name}{size}")
|
|
1691
|
+
|
|
1692
|
+
if item.get("type") == "directory" and recursion_limit > 0:
|
|
1693
|
+
result.append(
|
|
1694
|
+
self.tree(
|
|
1695
|
+
path=item.get("name", ""),
|
|
1696
|
+
recursion_limit=recursion_limit - 1,
|
|
1697
|
+
max_display=max_display,
|
|
1698
|
+
display_size=display_size,
|
|
1699
|
+
prefix=new_prefix,
|
|
1700
|
+
is_last=is_last_item,
|
|
1701
|
+
first=False,
|
|
1702
|
+
indent_size=indent_size,
|
|
1703
|
+
)
|
|
1704
|
+
)
|
|
1705
|
+
|
|
1706
|
+
if remaining_count > 0:
|
|
1707
|
+
more_message = f"{remaining_count} more item(s) not displayed."
|
|
1708
|
+
result.append(
|
|
1709
|
+
f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
|
|
1710
|
+
)
|
|
1711
|
+
|
|
1712
|
+
return "\n".join(_ for _ in result if _)
|
|
1713
|
+
|
|
1572
1714
|
# ------------------------------------------------------------------------
|
|
1573
1715
|
# Aliases
|
|
1574
1716
|
|
|
@@ -1733,7 +1875,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1733
1875
|
|
|
1734
1876
|
self.kwargs = kwargs
|
|
1735
1877
|
|
|
1736
|
-
if mode not in {"ab", "rb", "wb"}:
|
|
1878
|
+
if mode not in {"ab", "rb", "wb", "xb"}:
|
|
1737
1879
|
raise NotImplementedError("File mode not supported")
|
|
1738
1880
|
if mode == "rb":
|
|
1739
1881
|
if size is not None:
|
|
@@ -1799,7 +1941,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1799
1941
|
|
|
1800
1942
|
def info(self):
|
|
1801
1943
|
"""File information about this path"""
|
|
1802
|
-
if
|
|
1944
|
+
if self.readable():
|
|
1803
1945
|
return self.details
|
|
1804
1946
|
else:
|
|
1805
1947
|
raise ValueError("Info not available while writing")
|
|
@@ -1846,7 +1988,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1846
1988
|
data: bytes
|
|
1847
1989
|
Set of bytes to be written.
|
|
1848
1990
|
"""
|
|
1849
|
-
if self.
|
|
1991
|
+
if not self.writable():
|
|
1850
1992
|
raise ValueError("File not in write mode")
|
|
1851
1993
|
if self.closed:
|
|
1852
1994
|
raise ValueError("I/O operation on closed file.")
|
|
@@ -1879,7 +2021,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1879
2021
|
if force:
|
|
1880
2022
|
self.forced = True
|
|
1881
2023
|
|
|
1882
|
-
if self.
|
|
2024
|
+
if self.readable():
|
|
1883
2025
|
# no-op to flush on read-mode
|
|
1884
2026
|
return
|
|
1885
2027
|
|
|
@@ -1917,7 +2059,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1917
2059
|
|
|
1918
2060
|
def _fetch_range(self, start, end):
|
|
1919
2061
|
"""Get the specified set of bytes from remote"""
|
|
1920
|
-
|
|
2062
|
+
return self.fs.cat_file(self.path, start=start, end=end)
|
|
1921
2063
|
|
|
1922
2064
|
def read(self, length=-1):
|
|
1923
2065
|
"""
|
|
@@ -2028,21 +2170,22 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2028
2170
|
return
|
|
2029
2171
|
if self.closed:
|
|
2030
2172
|
return
|
|
2031
|
-
|
|
2032
|
-
self.
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
self.
|
|
2036
|
-
|
|
2037
|
-
if self.fs is not None:
|
|
2038
|
-
self.fs.invalidate_cache(self.path)
|
|
2039
|
-
self.fs.invalidate_cache(self.fs._parent(self.path))
|
|
2173
|
+
try:
|
|
2174
|
+
if self.mode == "rb":
|
|
2175
|
+
self.cache = None
|
|
2176
|
+
else:
|
|
2177
|
+
if not self.forced:
|
|
2178
|
+
self.flush(force=True)
|
|
2040
2179
|
|
|
2041
|
-
|
|
2180
|
+
if self.fs is not None:
|
|
2181
|
+
self.fs.invalidate_cache(self.path)
|
|
2182
|
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
|
2183
|
+
finally:
|
|
2184
|
+
self.closed = True
|
|
2042
2185
|
|
|
2043
2186
|
def readable(self):
|
|
2044
2187
|
"""Whether opened for reading"""
|
|
2045
|
-
return self.mode
|
|
2188
|
+
return "r" in self.mode and not self.closed
|
|
2046
2189
|
|
|
2047
2190
|
def seekable(self):
|
|
2048
2191
|
"""Whether is seekable (only in read mode)"""
|
|
@@ -2050,7 +2193,23 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2050
2193
|
|
|
2051
2194
|
def writable(self):
|
|
2052
2195
|
"""Whether opened for writing"""
|
|
2053
|
-
return self.mode in {"wb", "ab"} and not self.closed
|
|
2196
|
+
return self.mode in {"wb", "ab", "xb"} and not self.closed
|
|
2197
|
+
|
|
2198
|
+
def __reduce__(self):
|
|
2199
|
+
if self.mode != "rb":
|
|
2200
|
+
raise RuntimeError("Pickling a writeable file is not supported")
|
|
2201
|
+
|
|
2202
|
+
return reopen, (
|
|
2203
|
+
self.fs,
|
|
2204
|
+
self.path,
|
|
2205
|
+
self.mode,
|
|
2206
|
+
self.blocksize,
|
|
2207
|
+
self.loc,
|
|
2208
|
+
self.size,
|
|
2209
|
+
self.autocommit,
|
|
2210
|
+
self.cache.name if self.cache else "none",
|
|
2211
|
+
self.kwargs,
|
|
2212
|
+
)
|
|
2054
2213
|
|
|
2055
2214
|
def __del__(self):
|
|
2056
2215
|
if not self.closed:
|
|
@@ -2066,3 +2225,18 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2066
2225
|
|
|
2067
2226
|
def __exit__(self, *args):
|
|
2068
2227
|
self.close()
|
|
2228
|
+
|
|
2229
|
+
|
|
2230
|
+
def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
|
|
2231
|
+
file = fs.open(
|
|
2232
|
+
path,
|
|
2233
|
+
mode=mode,
|
|
2234
|
+
block_size=blocksize,
|
|
2235
|
+
autocommit=autocommit,
|
|
2236
|
+
cache_type=cache_type,
|
|
2237
|
+
size=size,
|
|
2238
|
+
**kwargs,
|
|
2239
|
+
)
|
|
2240
|
+
if loc > 0:
|
|
2241
|
+
file.seek(loc)
|
|
2242
|
+
return file
|
|
@@ -6,6 +6,8 @@ import pytest
|
|
|
6
6
|
from fsspec.implementations.local import LocalFileSystem
|
|
7
7
|
from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401
|
|
8
8
|
from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401
|
|
9
|
+
from fsspec.tests.abstract.open import AbstractOpenTests # noqa: F401
|
|
10
|
+
from fsspec.tests.abstract.pipe import AbstractPipeTests # noqa: F401
|
|
9
11
|
from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401
|
|
10
12
|
|
|
11
13
|
|
|
@@ -225,7 +227,7 @@ class BaseAbstractFixtures:
|
|
|
225
227
|
for i in range(10):
|
|
226
228
|
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
|
227
229
|
path = some_join(source, f"{hashed_i}.txt")
|
|
228
|
-
some_fs.pipe(path=path, value=f"{i}".encode(
|
|
230
|
+
some_fs.pipe(path=path, value=f"{i}".encode())
|
|
229
231
|
return source
|
|
230
232
|
|
|
231
233
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AbstractOpenTests:
|
|
5
|
+
def test_open_exclusive(self, fs, fs_target):
|
|
6
|
+
with fs.open(fs_target, "wb") as f:
|
|
7
|
+
f.write(b"data")
|
|
8
|
+
with fs.open(fs_target, "rb") as f:
|
|
9
|
+
assert f.read() == b"data"
|
|
10
|
+
with pytest.raises(FileExistsError):
|
|
11
|
+
fs.open(fs_target, "xb")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AbstractPipeTests:
|
|
5
|
+
def test_pipe_exclusive(self, fs, fs_target):
|
|
6
|
+
fs.pipe_file(fs_target, b"data")
|
|
7
|
+
assert fs.cat_file(fs_target) == b"data"
|
|
8
|
+
with pytest.raises(FileExistsError):
|
|
9
|
+
fs.pipe_file(fs_target, b"data", mode="create")
|
|
10
|
+
fs.pipe_file(fs_target, b"new data", mode="overwrite")
|
|
11
|
+
assert fs.cat_file(fs_target) == b"new data"
|