fsspec 2024.10.0__py3-none-any.whl → 2024.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/asyn.py +5 -7
- fsspec/caching.py +34 -19
- fsspec/core.py +15 -13
- fsspec/implementations/asyn_wrapper.py +98 -0
- fsspec/implementations/cached.py +1 -1
- fsspec/implementations/ftp.py +1 -1
- fsspec/implementations/http.py +4 -22
- fsspec/implementations/local.py +6 -1
- fsspec/implementations/memory.py +8 -3
- fsspec/implementations/reference.py +25 -8
- fsspec/implementations/webhdfs.py +2 -1
- fsspec/mapping.py +1 -1
- fsspec/parquet.py +1 -1
- fsspec/registry.py +4 -0
- fsspec/spec.py +206 -30
- fsspec/tests/abstract/__init__.py +3 -1
- fsspec/tests/abstract/open.py +11 -0
- fsspec/tests/abstract/pipe.py +11 -0
- fsspec/utils.py +4 -2
- {fsspec-2024.10.0.dist-info → fsspec-2024.12.0.dist-info}/METADATA +3 -2
- {fsspec-2024.10.0.dist-info → fsspec-2024.12.0.dist-info}/RECORD +24 -21
- {fsspec-2024.10.0.dist-info → fsspec-2024.12.0.dist-info}/WHEEL +1 -1
- {fsspec-2024.10.0.dist-info → fsspec-2024.12.0.dist-info}/licenses/LICENSE +0 -0
fsspec/_version.py
CHANGED
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '2024.
|
|
16
|
-
__version_tuple__ = version_tuple = (2024,
|
|
15
|
+
__version__ = version = '2024.12.0'
|
|
16
|
+
__version_tuple__ = version_tuple = (2024, 12, 0)
|
fsspec/asyn.py
CHANGED
|
@@ -408,7 +408,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
408
408
|
continue
|
|
409
409
|
raise ex
|
|
410
410
|
|
|
411
|
-
async def _pipe_file(self, path, value, **kwargs):
|
|
411
|
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
412
412
|
raise NotImplementedError
|
|
413
413
|
|
|
414
414
|
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
|
@@ -517,7 +517,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
517
517
|
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
|
518
518
|
)
|
|
519
519
|
|
|
520
|
-
async def _put_file(self, lpath, rpath, **kwargs):
|
|
520
|
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
|
521
521
|
raise NotImplementedError
|
|
522
522
|
|
|
523
523
|
async def _put(
|
|
@@ -816,11 +816,9 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
816
816
|
p: info
|
|
817
817
|
for p, info in sorted(allpaths.items())
|
|
818
818
|
if pattern.match(
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
else p
|
|
823
|
-
)
|
|
819
|
+
p + "/"
|
|
820
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
821
|
+
else p
|
|
824
822
|
)
|
|
825
823
|
}
|
|
826
824
|
|
fsspec/caching.py
CHANGED
|
@@ -8,6 +8,8 @@ import os
|
|
|
8
8
|
import threading
|
|
9
9
|
import warnings
|
|
10
10
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
11
|
+
from itertools import groupby
|
|
12
|
+
from operator import itemgetter
|
|
11
13
|
from typing import (
|
|
12
14
|
TYPE_CHECKING,
|
|
13
15
|
Any,
|
|
@@ -85,12 +87,7 @@ class BaseCache:
|
|
|
85
87
|
if self.hit_count == 0 and self.miss_count == 0:
|
|
86
88
|
# a cache that does nothing, this is for logs only
|
|
87
89
|
return ""
|
|
88
|
-
return " ,
|
|
89
|
-
self.name,
|
|
90
|
-
self.hit_count,
|
|
91
|
-
self.miss_count,
|
|
92
|
-
self.total_requested_bytes,
|
|
93
|
-
)
|
|
90
|
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
|
94
91
|
|
|
95
92
|
def __repr__(self) -> str:
|
|
96
93
|
# TODO: use rich for better formatting
|
|
@@ -161,21 +158,39 @@ class MMapCache(BaseCache):
|
|
|
161
158
|
return b""
|
|
162
159
|
start_block = start // self.blocksize
|
|
163
160
|
end_block = end // self.blocksize
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
161
|
+
block_range = range(start_block, end_block + 1)
|
|
162
|
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
|
163
|
+
need = (i for i in block_range if i not in self.blocks)
|
|
164
|
+
# Count the number of blocks already cached
|
|
165
|
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
|
166
|
+
|
|
167
|
+
# Consolidate needed blocks.
|
|
168
|
+
# Algorithm adapted from Python 2.x itertools documentation.
|
|
169
|
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
|
170
|
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
|
171
|
+
# we can detect when the block number skips values. The key computes this difference.
|
|
172
|
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
|
173
|
+
# and a new group is started. In other words, this algorithm neatly groups
|
|
174
|
+
# runs of consecutive block numbers so they can be fetched together.
|
|
175
|
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
|
176
|
+
# Extract the blocks from the enumerated sequence
|
|
177
|
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
|
178
|
+
# Compute start of first block
|
|
179
|
+
sstart = _blocks[0] * self.blocksize
|
|
180
|
+
# Compute the end of the last block. Last block may not be full size.
|
|
181
|
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
|
182
|
+
|
|
183
|
+
# Fetch bytes (could be multiple consecutive blocks)
|
|
175
184
|
self.total_requested_bytes += send - sstart
|
|
176
|
-
logger.debug(
|
|
185
|
+
logger.debug(
|
|
186
|
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
|
187
|
+
)
|
|
177
188
|
self.cache[sstart:send] = self.fetcher(sstart, send)
|
|
178
|
-
|
|
189
|
+
|
|
190
|
+
# Update set of cached blocks
|
|
191
|
+
self.blocks.update(_blocks)
|
|
192
|
+
# Update cache statistics with number of blocks we had to cache
|
|
193
|
+
self.miss_count += len(_blocks)
|
|
179
194
|
|
|
180
195
|
return self.cache[start:end]
|
|
181
196
|
|
fsspec/core.py
CHANGED
|
@@ -329,12 +329,19 @@ def open_files(
|
|
|
329
329
|
|
|
330
330
|
|
|
331
331
|
def _un_chain(path, kwargs):
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
332
|
+
# Avoid a circular import
|
|
333
|
+
from fsspec.implementations.cached import CachingFileSystem
|
|
334
|
+
|
|
335
|
+
if "::" in path:
|
|
336
|
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
|
337
|
+
bits = []
|
|
338
|
+
for p in path.split("::"):
|
|
339
|
+
if "://" in p or x.match(p):
|
|
340
|
+
bits.append(p)
|
|
341
|
+
else:
|
|
342
|
+
bits.append(p + "://")
|
|
343
|
+
else:
|
|
344
|
+
bits = [path]
|
|
338
345
|
# [[url, protocol, kwargs], ...]
|
|
339
346
|
out = []
|
|
340
347
|
previous_bit = None
|
|
@@ -351,10 +358,7 @@ def _un_chain(path, kwargs):
|
|
|
351
358
|
**kws,
|
|
352
359
|
)
|
|
353
360
|
bit = cls._strip_protocol(bit)
|
|
354
|
-
if (
|
|
355
|
-
protocol in {"blockcache", "filecache", "simplecache"}
|
|
356
|
-
and "target_protocol" not in kw
|
|
357
|
-
):
|
|
361
|
+
if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
|
|
358
362
|
bit = previous_bit
|
|
359
363
|
out.append((bit, protocol, kw))
|
|
360
364
|
previous_bit = bit
|
|
@@ -676,9 +680,7 @@ def get_fs_token_paths(
|
|
|
676
680
|
elif not isinstance(paths, list):
|
|
677
681
|
paths = list(paths)
|
|
678
682
|
else:
|
|
679
|
-
if "w" in mode and expand:
|
|
680
|
-
paths = _expand_paths(paths, name_function, num)
|
|
681
|
-
elif "x" in mode and expand:
|
|
683
|
+
if ("w" in mode or "x" in mode) and expand:
|
|
682
684
|
paths = _expand_paths(paths, name_function, num)
|
|
683
685
|
elif "*" in paths:
|
|
684
686
|
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
|
|
5
|
+
from fsspec.asyn import AsyncFileSystem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def async_wrapper(func, obj=None):
|
|
9
|
+
"""
|
|
10
|
+
Wraps a synchronous function to make it awaitable.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
func : callable
|
|
15
|
+
The synchronous function to wrap.
|
|
16
|
+
obj : object, optional
|
|
17
|
+
The instance to bind the function to, if applicable.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
coroutine
|
|
22
|
+
An awaitable version of the function.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@functools.wraps(func)
|
|
26
|
+
async def wrapper(*args, **kwargs):
|
|
27
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
|
28
|
+
|
|
29
|
+
return wrapper
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
33
|
+
"""
|
|
34
|
+
A wrapper class to convert a synchronous filesystem into an asynchronous one.
|
|
35
|
+
|
|
36
|
+
This class takes an existing synchronous filesystem implementation and wraps all
|
|
37
|
+
its methods to provide an asynchronous interface.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
sync_fs : AbstractFileSystem
|
|
42
|
+
The synchronous filesystem instance to wrap.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, sync_fs, *args, **kwargs):
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
self.asynchronous = True
|
|
48
|
+
self.sync_fs = sync_fs
|
|
49
|
+
self.protocol = self.sync_fs.protocol
|
|
50
|
+
self._wrap_all_sync_methods()
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def fsid(self):
|
|
54
|
+
return f"async_{self.sync_fs.fsid}"
|
|
55
|
+
|
|
56
|
+
def _wrap_all_sync_methods(self):
|
|
57
|
+
"""
|
|
58
|
+
Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
|
|
59
|
+
"""
|
|
60
|
+
for method_name in dir(self.sync_fs):
|
|
61
|
+
if method_name.startswith("_"):
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
attr = inspect.getattr_static(self.sync_fs, method_name)
|
|
65
|
+
if isinstance(attr, property):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
method = getattr(self.sync_fs, method_name)
|
|
69
|
+
if callable(method) and not asyncio.iscoroutinefunction(method):
|
|
70
|
+
async_method = async_wrapper(method, obj=self)
|
|
71
|
+
setattr(self, f"_{method_name}", async_method)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def wrap_class(cls, sync_fs_class):
|
|
75
|
+
"""
|
|
76
|
+
Create a new class that can be used to instantiate an AsyncFileSystemWrapper
|
|
77
|
+
with lazy instantiation of the underlying synchronous filesystem.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
sync_fs_class : type
|
|
82
|
+
The class of the synchronous filesystem to wrap.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
type
|
|
87
|
+
A new class that wraps the provided synchronous filesystem class.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
class GeneratedAsyncFileSystemWrapper(cls):
|
|
91
|
+
def __init__(self, *args, **kwargs):
|
|
92
|
+
sync_fs = sync_fs_class(*args, **kwargs)
|
|
93
|
+
super().__init__(sync_fs)
|
|
94
|
+
|
|
95
|
+
GeneratedAsyncFileSystemWrapper.__name__ = (
|
|
96
|
+
f"Async{sync_fs_class.__name__}Wrapper"
|
|
97
|
+
)
|
|
98
|
+
return GeneratedAsyncFileSystemWrapper
|
fsspec/implementations/cached.py
CHANGED
|
@@ -612,7 +612,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
612
612
|
**kwargs,
|
|
613
613
|
):
|
|
614
614
|
paths = self.expand_path(
|
|
615
|
-
path, recursive=recursive, maxdepth=kwargs.get("maxdepth"
|
|
615
|
+
path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
|
|
616
616
|
)
|
|
617
617
|
getpaths = []
|
|
618
618
|
storepaths = []
|
fsspec/implementations/ftp.py
CHANGED
fsspec/implementations/http.py
CHANGED
|
@@ -273,8 +273,12 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
273
273
|
chunk_size=5 * 2**20,
|
|
274
274
|
callback=DEFAULT_CALLBACK,
|
|
275
275
|
method="post",
|
|
276
|
+
mode="overwrite",
|
|
276
277
|
**kwargs,
|
|
277
278
|
):
|
|
279
|
+
if mode != "overwrite":
|
|
280
|
+
raise NotImplementedError("Exclusive write")
|
|
281
|
+
|
|
278
282
|
async def gen_chunks():
|
|
279
283
|
# Support passing arbitrary file-like objects
|
|
280
284
|
# and use them instead of streams.
|
|
@@ -692,25 +696,6 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
692
696
|
|
|
693
697
|
_fetch_range = sync_wrapper(async_fetch_range)
|
|
694
698
|
|
|
695
|
-
def __reduce__(self):
|
|
696
|
-
return (
|
|
697
|
-
reopen,
|
|
698
|
-
(
|
|
699
|
-
self.fs,
|
|
700
|
-
self.url,
|
|
701
|
-
self.mode,
|
|
702
|
-
self.blocksize,
|
|
703
|
-
self.cache.name if self.cache else "none",
|
|
704
|
-
self.size,
|
|
705
|
-
),
|
|
706
|
-
)
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
def reopen(fs, url, mode, blocksize, cache_type, size=None):
|
|
710
|
-
return fs.open(
|
|
711
|
-
url, mode=mode, block_size=blocksize, cache_type=cache_type, size=size
|
|
712
|
-
)
|
|
713
|
-
|
|
714
699
|
|
|
715
700
|
magic_check = re.compile("([*[])")
|
|
716
701
|
|
|
@@ -760,9 +745,6 @@ class HTTPStreamFile(AbstractBufferedFile):
|
|
|
760
745
|
asyncio.run_coroutine_threadsafe(self._close(), self.loop)
|
|
761
746
|
super().close()
|
|
762
747
|
|
|
763
|
-
def __reduce__(self):
|
|
764
|
-
return reopen, (self.fs, self.url, self.mode, self.blocksize, self.cache.name)
|
|
765
|
-
|
|
766
748
|
|
|
767
749
|
class AsyncStreamFile(AbstractAsyncStreamedFile):
|
|
768
750
|
def __init__(
|
fsspec/implementations/local.py
CHANGED
|
@@ -60,7 +60,12 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
60
60
|
info = self.info(path)
|
|
61
61
|
if info["type"] == "directory":
|
|
62
62
|
with os.scandir(path) as it:
|
|
63
|
-
infos = [
|
|
63
|
+
infos = []
|
|
64
|
+
for f in it:
|
|
65
|
+
try:
|
|
66
|
+
infos.append(self.info(f))
|
|
67
|
+
except FileNotFoundError:
|
|
68
|
+
pass
|
|
64
69
|
else:
|
|
65
70
|
infos = [info]
|
|
66
71
|
|
fsspec/implementations/memory.py
CHANGED
|
@@ -126,12 +126,13 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
126
126
|
if not exist_ok:
|
|
127
127
|
raise
|
|
128
128
|
|
|
129
|
-
def pipe_file(self, path, value, **kwargs):
|
|
129
|
+
def pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
130
130
|
"""Set the bytes of given file
|
|
131
131
|
|
|
132
132
|
Avoids copies of the data if possible
|
|
133
133
|
"""
|
|
134
|
-
|
|
134
|
+
mode = "xb" if mode == "create" else "wb"
|
|
135
|
+
self.open(path, mode=mode, data=value)
|
|
135
136
|
|
|
136
137
|
def rmdir(self, path):
|
|
137
138
|
path = self._strip_protocol(path)
|
|
@@ -178,6 +179,8 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
178
179
|
**kwargs,
|
|
179
180
|
):
|
|
180
181
|
path = self._strip_protocol(path)
|
|
182
|
+
if "x" in mode and self.exists(path):
|
|
183
|
+
raise FileExistsError
|
|
181
184
|
if path in self.pseudo_dirs:
|
|
182
185
|
raise IsADirectoryError(path)
|
|
183
186
|
parent = path
|
|
@@ -197,7 +200,9 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
197
200
|
return f
|
|
198
201
|
else:
|
|
199
202
|
raise FileNotFoundError(path)
|
|
200
|
-
elif mode
|
|
203
|
+
elif mode in {"wb", "xb"}:
|
|
204
|
+
if mode == "xb" and self.exists(path):
|
|
205
|
+
raise FileExistsError
|
|
201
206
|
m = MemoryFile(self, path, kwargs.get("data"))
|
|
202
207
|
if not self._intrans:
|
|
203
208
|
m.commit()
|
|
@@ -5,8 +5,8 @@ import itertools
|
|
|
5
5
|
import logging
|
|
6
6
|
import math
|
|
7
7
|
import os
|
|
8
|
-
from itertools import chain
|
|
9
8
|
from functools import lru_cache
|
|
9
|
+
from itertools import chain
|
|
10
10
|
from typing import TYPE_CHECKING, Literal
|
|
11
11
|
|
|
12
12
|
import fsspec.core
|
|
@@ -20,6 +20,7 @@ except ImportError:
|
|
|
20
20
|
from fsspec.asyn import AsyncFileSystem
|
|
21
21
|
from fsspec.callbacks import DEFAULT_CALLBACK
|
|
22
22
|
from fsspec.core import filesystem, open, split_protocol
|
|
23
|
+
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
|
|
23
24
|
from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger("fsspec.reference")
|
|
@@ -41,7 +42,7 @@ def _first(d):
|
|
|
41
42
|
|
|
42
43
|
def _prot_in_references(path, references):
|
|
43
44
|
ref = references.get(path)
|
|
44
|
-
if isinstance(ref, (list, tuple)):
|
|
45
|
+
if isinstance(ref, (list, tuple)) and isinstance(ref[0], str):
|
|
45
46
|
return split_protocol(ref[0])[0] if ref[0] else ref[0]
|
|
46
47
|
|
|
47
48
|
|
|
@@ -173,8 +174,11 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
173
174
|
"""cached parquet file loader"""
|
|
174
175
|
path = self.url.format(field=field, record=record)
|
|
175
176
|
data = io.BytesIO(self.fs.cat_file(path))
|
|
176
|
-
|
|
177
|
-
|
|
177
|
+
try:
|
|
178
|
+
df = self.pd.read_parquet(data, engine=self.engine)
|
|
179
|
+
refs = {c: df[c].to_numpy() for c in df.columns}
|
|
180
|
+
except OSError:
|
|
181
|
+
refs = None
|
|
178
182
|
return refs
|
|
179
183
|
|
|
180
184
|
self.open_refs = open_refs
|
|
@@ -428,7 +432,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
428
432
|
if len(partition) < self.record_size:
|
|
429
433
|
try:
|
|
430
434
|
original = self.open_refs(field, record)
|
|
431
|
-
except
|
|
435
|
+
except OSError:
|
|
432
436
|
pass
|
|
433
437
|
|
|
434
438
|
if original:
|
|
@@ -754,6 +758,10 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
754
758
|
self.fss[remote_protocol] = fs
|
|
755
759
|
|
|
756
760
|
self.fss[None] = fs or filesystem("file") # default one
|
|
761
|
+
# Wrap any non-async filesystems to ensure async methods are available below
|
|
762
|
+
for k, f in self.fss.items():
|
|
763
|
+
if not f.async_impl:
|
|
764
|
+
self.fss[k] = AsyncFileSystemWrapper(f)
|
|
757
765
|
|
|
758
766
|
def _cat_common(self, path, start=None, end=None):
|
|
759
767
|
path = self._strip_protocol(path)
|
|
@@ -803,7 +811,9 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
803
811
|
return part_or_url[start:end]
|
|
804
812
|
protocol, _ = split_protocol(part_or_url)
|
|
805
813
|
try:
|
|
806
|
-
await self.fss[protocol]._cat_file(
|
|
814
|
+
return await self.fss[protocol]._cat_file(
|
|
815
|
+
part_or_url, start=start0, end=end0
|
|
816
|
+
)
|
|
807
817
|
except Exception as e:
|
|
808
818
|
raise ReferenceNotReachable(path, part_or_url) from e
|
|
809
819
|
|
|
@@ -871,6 +881,9 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
871
881
|
# found and on_error is "raise"
|
|
872
882
|
try:
|
|
873
883
|
u, s, e = self._cat_common(p)
|
|
884
|
+
if not isinstance(u, (bytes, str)):
|
|
885
|
+
# nan/None from parquet
|
|
886
|
+
continue
|
|
874
887
|
except FileNotFoundError as err:
|
|
875
888
|
if on_error == "raise":
|
|
876
889
|
raise
|
|
@@ -1173,13 +1186,17 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
1173
1186
|
) # ignores FileNotFound, just as well for directories
|
|
1174
1187
|
self.dircache.clear() # this is a bit heavy handed
|
|
1175
1188
|
|
|
1176
|
-
async def _pipe_file(self, path, data):
|
|
1189
|
+
async def _pipe_file(self, path, data, mode="overwrite", **kwargs):
|
|
1190
|
+
if mode == "create" and self.exists(path):
|
|
1191
|
+
raise FileExistsError
|
|
1177
1192
|
# can be str or bytes
|
|
1178
1193
|
self.references[path] = data
|
|
1179
1194
|
self.dircache.clear() # this is a bit heavy handed
|
|
1180
1195
|
|
|
1181
|
-
async def _put_file(self, lpath, rpath, **kwargs):
|
|
1196
|
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
|
1182
1197
|
# puts binary
|
|
1198
|
+
if mode == "create" and self.exists(rpath):
|
|
1199
|
+
raise FileExistsError
|
|
1183
1200
|
with open(lpath, "rb") as f:
|
|
1184
1201
|
self.references[rpath] = f.read()
|
|
1185
1202
|
self.dircache.clear() # this is a bit heavy handed
|
|
@@ -166,7 +166,8 @@ class WebHDFS(AbstractFileSystem):
|
|
|
166
166
|
self.session.auth = HTTPBasicAuth(self.user, self.password)
|
|
167
167
|
|
|
168
168
|
def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
|
|
169
|
-
|
|
169
|
+
path = self._strip_protocol(path) if path is not None else ""
|
|
170
|
+
url = self._apply_proxy(self.url + quote(path, safe="/="))
|
|
170
171
|
args = kwargs.copy()
|
|
171
172
|
args.update(self.pars)
|
|
172
173
|
args["op"] = op.upper()
|
fsspec/mapping.py
CHANGED
|
@@ -112,7 +112,7 @@ class FSMap(MutableMapping):
|
|
|
112
112
|
for k, v in out.items()
|
|
113
113
|
}
|
|
114
114
|
return {
|
|
115
|
-
key: out[k2]
|
|
115
|
+
key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
|
|
116
116
|
for key, k2 in zip(keys, keys2)
|
|
117
117
|
if on_error == "return" or not isinstance(out[k2], BaseException)
|
|
118
118
|
}
|
fsspec/parquet.py
CHANGED
fsspec/registry.py
CHANGED
|
@@ -202,6 +202,10 @@ known_implementations = {
|
|
|
202
202
|
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
|
203
203
|
},
|
|
204
204
|
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
|
205
|
+
"tosfs": {
|
|
206
|
+
"class": "tosfs.TosFileSystem",
|
|
207
|
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
|
208
|
+
},
|
|
205
209
|
"wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
|
|
206
210
|
"webdav": {
|
|
207
211
|
"class": "webdav4.fsspec.WebdavFileSystem",
|
fsspec/spec.py
CHANGED
|
@@ -10,7 +10,7 @@ import weakref
|
|
|
10
10
|
from errno import ESPIPE
|
|
11
11
|
from glob import has_magic
|
|
12
12
|
from hashlib import sha256
|
|
13
|
-
from typing import Any, ClassVar
|
|
13
|
+
from typing import Any, ClassVar
|
|
14
14
|
|
|
15
15
|
from .callbacks import DEFAULT_CALLBACK
|
|
16
16
|
from .config import apply_config, conf
|
|
@@ -117,8 +117,8 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
117
117
|
_extra_tokenize_attributes = ()
|
|
118
118
|
|
|
119
119
|
# Set by _Cached metaclass
|
|
120
|
-
storage_args:
|
|
121
|
-
storage_options:
|
|
120
|
+
storage_args: tuple[Any, ...]
|
|
121
|
+
storage_options: dict[str, Any]
|
|
122
122
|
|
|
123
123
|
def __init__(self, *args, **storage_options):
|
|
124
124
|
"""Create and configure file-system instance
|
|
@@ -408,7 +408,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
408
408
|
topdown: bool (True)
|
|
409
409
|
Whether to walk the directory tree from the top downwards or from
|
|
410
410
|
the bottom upwards.
|
|
411
|
-
on_error: "omit", "raise", a
|
|
411
|
+
on_error: "omit", "raise", a callable
|
|
412
412
|
if omit (default), path with exception will simply be empty;
|
|
413
413
|
If raise, an underlying exception will be raised;
|
|
414
414
|
if callable, it will be called with a single OSError instance as argument
|
|
@@ -615,11 +615,9 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
615
615
|
p: info
|
|
616
616
|
for p, info in sorted(allpaths.items())
|
|
617
617
|
if pattern.match(
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
else p
|
|
622
|
-
)
|
|
618
|
+
p + "/"
|
|
619
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
620
|
+
else p
|
|
623
621
|
)
|
|
624
622
|
}
|
|
625
623
|
|
|
@@ -780,8 +778,12 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
780
778
|
return f.read(end - f.tell())
|
|
781
779
|
return f.read()
|
|
782
780
|
|
|
783
|
-
def pipe_file(self, path, value, **kwargs):
|
|
781
|
+
def pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
784
782
|
"""Set the bytes of given file"""
|
|
783
|
+
if mode == "create" and self.exists(path):
|
|
784
|
+
# non-atomic but simple way; or could use "xb" in open(), which is likely
|
|
785
|
+
# not as well supported
|
|
786
|
+
raise FileExistsError
|
|
785
787
|
with self.open(path, "wb", **kwargs) as f:
|
|
786
788
|
f.write(value)
|
|
787
789
|
|
|
@@ -973,8 +975,12 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
973
975
|
with callback.branched(rpath, lpath) as child:
|
|
974
976
|
self.get_file(rpath, lpath, callback=child, **kwargs)
|
|
975
977
|
|
|
976
|
-
def put_file(
|
|
978
|
+
def put_file(
|
|
979
|
+
self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
|
|
980
|
+
):
|
|
977
981
|
"""Copy single file to remote"""
|
|
982
|
+
if mode == "create" and self.exists(rpath):
|
|
983
|
+
raise FileExistsError
|
|
978
984
|
if os.path.isdir(lpath):
|
|
979
985
|
self.makedirs(rpath, exist_ok=True)
|
|
980
986
|
return None
|
|
@@ -1264,6 +1270,9 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1264
1270
|
Target file
|
|
1265
1271
|
mode: str like 'rb', 'w'
|
|
1266
1272
|
See builtin ``open()``
|
|
1273
|
+
Mode "x" (exclusive write) may be implemented by the backend. Even if
|
|
1274
|
+
it is, whether it is checked up front or on commit, and whether it is
|
|
1275
|
+
atomic is implementation-dependent.
|
|
1267
1276
|
block_size: int
|
|
1268
1277
|
Some indication of buffering - this is a value in bytes
|
|
1269
1278
|
cache_options : dict, optional
|
|
@@ -1442,7 +1451,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1442
1451
|
|
|
1443
1452
|
return json.loads(blob, cls=FilesystemJSONDecoder)
|
|
1444
1453
|
|
|
1445
|
-
def to_dict(self, *, include_password: bool = True) ->
|
|
1454
|
+
def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
|
|
1446
1455
|
"""
|
|
1447
1456
|
JSON-serializable dictionary representation of this filesystem instance.
|
|
1448
1457
|
|
|
@@ -1483,7 +1492,7 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1483
1492
|
)
|
|
1484
1493
|
|
|
1485
1494
|
@staticmethod
|
|
1486
|
-
def from_dict(dct:
|
|
1495
|
+
def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
|
|
1487
1496
|
"""
|
|
1488
1497
|
Recreate a filesystem instance from dictionary representation.
|
|
1489
1498
|
|
|
@@ -1567,6 +1576,141 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1567
1576
|
"""Return the modified timestamp of a file as a datetime.datetime"""
|
|
1568
1577
|
raise NotImplementedError
|
|
1569
1578
|
|
|
1579
|
+
def tree(
|
|
1580
|
+
self,
|
|
1581
|
+
path: str = "/",
|
|
1582
|
+
recursion_limit: int = 2,
|
|
1583
|
+
max_display: int = 25,
|
|
1584
|
+
display_size: bool = False,
|
|
1585
|
+
prefix: str = "",
|
|
1586
|
+
is_last: bool = True,
|
|
1587
|
+
first: bool = True,
|
|
1588
|
+
indent_size: int = 4,
|
|
1589
|
+
) -> str:
|
|
1590
|
+
"""
|
|
1591
|
+
Return a tree-like structure of the filesystem starting from the given path as a string.
|
|
1592
|
+
|
|
1593
|
+
Parameters
|
|
1594
|
+
----------
|
|
1595
|
+
path: Root path to start traversal from
|
|
1596
|
+
recursion_limit: Maximum depth of directory traversal
|
|
1597
|
+
max_display: Maximum number of items to display per directory
|
|
1598
|
+
display_size: Whether to display file sizes
|
|
1599
|
+
prefix: Current line prefix for visual tree structure
|
|
1600
|
+
is_last: Whether current item is last in its level
|
|
1601
|
+
first: Whether this is the first call (displays root path)
|
|
1602
|
+
indent_size: Number of spaces by indent
|
|
1603
|
+
|
|
1604
|
+
Returns
|
|
1605
|
+
-------
|
|
1606
|
+
str: A string representing the tree structure.
|
|
1607
|
+
|
|
1608
|
+
Example
|
|
1609
|
+
-------
|
|
1610
|
+
>>> from fsspec import filesystem
|
|
1611
|
+
|
|
1612
|
+
>>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
|
|
1613
|
+
>>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
|
|
1614
|
+
>>> print(tree)
|
|
1615
|
+
"""
|
|
1616
|
+
|
|
1617
|
+
def format_bytes(n: int) -> str:
|
|
1618
|
+
"""Format bytes as text."""
|
|
1619
|
+
for prefix, k in (
|
|
1620
|
+
("P", 2**50),
|
|
1621
|
+
("T", 2**40),
|
|
1622
|
+
("G", 2**30),
|
|
1623
|
+
("M", 2**20),
|
|
1624
|
+
("k", 2**10),
|
|
1625
|
+
):
|
|
1626
|
+
if n >= 0.9 * k:
|
|
1627
|
+
return f"{n / k:.2f} {prefix}b"
|
|
1628
|
+
return f"{n}B"
|
|
1629
|
+
|
|
1630
|
+
result = []
|
|
1631
|
+
|
|
1632
|
+
if first:
|
|
1633
|
+
result.append(path)
|
|
1634
|
+
|
|
1635
|
+
if recursion_limit:
|
|
1636
|
+
indent = " " * indent_size
|
|
1637
|
+
contents = self.ls(path, detail=True)
|
|
1638
|
+
contents.sort(
|
|
1639
|
+
key=lambda x: (x.get("type") != "directory", x.get("name", ""))
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
if max_display is not None and len(contents) > max_display:
|
|
1643
|
+
displayed_contents = contents[:max_display]
|
|
1644
|
+
remaining_count = len(contents) - max_display
|
|
1645
|
+
else:
|
|
1646
|
+
displayed_contents = contents
|
|
1647
|
+
remaining_count = 0
|
|
1648
|
+
|
|
1649
|
+
for i, item in enumerate(displayed_contents):
|
|
1650
|
+
is_last_item = (i == len(displayed_contents) - 1) and (
|
|
1651
|
+
remaining_count == 0
|
|
1652
|
+
)
|
|
1653
|
+
|
|
1654
|
+
branch = (
|
|
1655
|
+
"└" + ("─" * (indent_size - 2))
|
|
1656
|
+
if is_last_item
|
|
1657
|
+
else "├" + ("─" * (indent_size - 2))
|
|
1658
|
+
)
|
|
1659
|
+
branch += " "
|
|
1660
|
+
new_prefix = prefix + (
|
|
1661
|
+
indent if is_last_item else "│" + " " * (indent_size - 1)
|
|
1662
|
+
)
|
|
1663
|
+
|
|
1664
|
+
name = os.path.basename(item.get("name", ""))
|
|
1665
|
+
|
|
1666
|
+
if display_size and item.get("type") == "directory":
|
|
1667
|
+
sub_contents = self.ls(item.get("name", ""), detail=True)
|
|
1668
|
+
num_files = sum(
|
|
1669
|
+
1 for sub_item in sub_contents if sub_item.get("type") == "file"
|
|
1670
|
+
)
|
|
1671
|
+
num_folders = sum(
|
|
1672
|
+
1
|
|
1673
|
+
for sub_item in sub_contents
|
|
1674
|
+
if sub_item.get("type") == "directory"
|
|
1675
|
+
)
|
|
1676
|
+
|
|
1677
|
+
if num_files == 0 and num_folders == 0:
|
|
1678
|
+
size = " (empty folder)"
|
|
1679
|
+
elif num_files == 0:
|
|
1680
|
+
size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
|
1681
|
+
elif num_folders == 0:
|
|
1682
|
+
size = f" ({num_files} file{'s' if num_files > 1 else ''})"
|
|
1683
|
+
else:
|
|
1684
|
+
size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
|
|
1685
|
+
elif display_size and item.get("type") == "file":
|
|
1686
|
+
size = f" ({format_bytes(item.get('size', 0))})"
|
|
1687
|
+
else:
|
|
1688
|
+
size = ""
|
|
1689
|
+
|
|
1690
|
+
result.append(f"{prefix}{branch}{name}{size}")
|
|
1691
|
+
|
|
1692
|
+
if item.get("type") == "directory" and recursion_limit > 0:
|
|
1693
|
+
result.append(
|
|
1694
|
+
self.tree(
|
|
1695
|
+
path=item.get("name", ""),
|
|
1696
|
+
recursion_limit=recursion_limit - 1,
|
|
1697
|
+
max_display=max_display,
|
|
1698
|
+
display_size=display_size,
|
|
1699
|
+
prefix=new_prefix,
|
|
1700
|
+
is_last=is_last_item,
|
|
1701
|
+
first=False,
|
|
1702
|
+
indent_size=indent_size,
|
|
1703
|
+
)
|
|
1704
|
+
)
|
|
1705
|
+
|
|
1706
|
+
if remaining_count > 0:
|
|
1707
|
+
more_message = f"{remaining_count} more item(s) not displayed."
|
|
1708
|
+
result.append(
|
|
1709
|
+
f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
|
|
1710
|
+
)
|
|
1711
|
+
|
|
1712
|
+
return "\n".join(_ for _ in result if _)
|
|
1713
|
+
|
|
1570
1714
|
# ------------------------------------------------------------------------
|
|
1571
1715
|
# Aliases
|
|
1572
1716
|
|
|
@@ -1731,7 +1875,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1731
1875
|
|
|
1732
1876
|
self.kwargs = kwargs
|
|
1733
1877
|
|
|
1734
|
-
if mode not in {"ab", "rb", "wb"}:
|
|
1878
|
+
if mode not in {"ab", "rb", "wb", "xb"}:
|
|
1735
1879
|
raise NotImplementedError("File mode not supported")
|
|
1736
1880
|
if mode == "rb":
|
|
1737
1881
|
if size is not None:
|
|
@@ -1797,7 +1941,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1797
1941
|
|
|
1798
1942
|
def info(self):
|
|
1799
1943
|
"""File information about this path"""
|
|
1800
|
-
if
|
|
1944
|
+
if self.readable():
|
|
1801
1945
|
return self.details
|
|
1802
1946
|
else:
|
|
1803
1947
|
raise ValueError("Info not available while writing")
|
|
@@ -1844,7 +1988,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1844
1988
|
data: bytes
|
|
1845
1989
|
Set of bytes to be written.
|
|
1846
1990
|
"""
|
|
1847
|
-
if self.
|
|
1991
|
+
if not self.writable():
|
|
1848
1992
|
raise ValueError("File not in write mode")
|
|
1849
1993
|
if self.closed:
|
|
1850
1994
|
raise ValueError("I/O operation on closed file.")
|
|
@@ -1877,7 +2021,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1877
2021
|
if force:
|
|
1878
2022
|
self.forced = True
|
|
1879
2023
|
|
|
1880
|
-
if self.
|
|
2024
|
+
if self.readable():
|
|
1881
2025
|
# no-op to flush on read-mode
|
|
1882
2026
|
return
|
|
1883
2027
|
|
|
@@ -1915,7 +2059,7 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
1915
2059
|
|
|
1916
2060
|
def _fetch_range(self, start, end):
|
|
1917
2061
|
"""Get the specified set of bytes from remote"""
|
|
1918
|
-
|
|
2062
|
+
return self.fs.cat_file(self.path, start=start, end=end)
|
|
1919
2063
|
|
|
1920
2064
|
def read(self, length=-1):
|
|
1921
2065
|
"""
|
|
@@ -2026,21 +2170,22 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2026
2170
|
return
|
|
2027
2171
|
if self.closed:
|
|
2028
2172
|
return
|
|
2029
|
-
|
|
2030
|
-
self.
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
self.
|
|
2034
|
-
|
|
2035
|
-
if self.fs is not None:
|
|
2036
|
-
self.fs.invalidate_cache(self.path)
|
|
2037
|
-
self.fs.invalidate_cache(self.fs._parent(self.path))
|
|
2173
|
+
try:
|
|
2174
|
+
if self.mode == "rb":
|
|
2175
|
+
self.cache = None
|
|
2176
|
+
else:
|
|
2177
|
+
if not self.forced:
|
|
2178
|
+
self.flush(force=True)
|
|
2038
2179
|
|
|
2039
|
-
|
|
2180
|
+
if self.fs is not None:
|
|
2181
|
+
self.fs.invalidate_cache(self.path)
|
|
2182
|
+
self.fs.invalidate_cache(self.fs._parent(self.path))
|
|
2183
|
+
finally:
|
|
2184
|
+
self.closed = True
|
|
2040
2185
|
|
|
2041
2186
|
def readable(self):
|
|
2042
2187
|
"""Whether opened for reading"""
|
|
2043
|
-
return self.mode
|
|
2188
|
+
return "r" in self.mode and not self.closed
|
|
2044
2189
|
|
|
2045
2190
|
def seekable(self):
|
|
2046
2191
|
"""Whether is seekable (only in read mode)"""
|
|
@@ -2048,7 +2193,23 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2048
2193
|
|
|
2049
2194
|
def writable(self):
|
|
2050
2195
|
"""Whether opened for writing"""
|
|
2051
|
-
return self.mode in {"wb", "ab"} and not self.closed
|
|
2196
|
+
return self.mode in {"wb", "ab", "xb"} and not self.closed
|
|
2197
|
+
|
|
2198
|
+
def __reduce__(self):
|
|
2199
|
+
if self.mode != "rb":
|
|
2200
|
+
raise RuntimeError("Pickling a writeable file is not supported")
|
|
2201
|
+
|
|
2202
|
+
return reopen, (
|
|
2203
|
+
self.fs,
|
|
2204
|
+
self.path,
|
|
2205
|
+
self.mode,
|
|
2206
|
+
self.blocksize,
|
|
2207
|
+
self.loc,
|
|
2208
|
+
self.size,
|
|
2209
|
+
self.autocommit,
|
|
2210
|
+
self.cache.name if self.cache else "none",
|
|
2211
|
+
self.kwargs,
|
|
2212
|
+
)
|
|
2052
2213
|
|
|
2053
2214
|
def __del__(self):
|
|
2054
2215
|
if not self.closed:
|
|
@@ -2064,3 +2225,18 @@ class AbstractBufferedFile(io.IOBase):
|
|
|
2064
2225
|
|
|
2065
2226
|
def __exit__(self, *args):
|
|
2066
2227
|
self.close()
|
|
2228
|
+
|
|
2229
|
+
|
|
2230
|
+
def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
|
|
2231
|
+
file = fs.open(
|
|
2232
|
+
path,
|
|
2233
|
+
mode=mode,
|
|
2234
|
+
block_size=blocksize,
|
|
2235
|
+
autocommit=autocommit,
|
|
2236
|
+
cache_type=cache_type,
|
|
2237
|
+
size=size,
|
|
2238
|
+
**kwargs,
|
|
2239
|
+
)
|
|
2240
|
+
if loc > 0:
|
|
2241
|
+
file.seek(loc)
|
|
2242
|
+
return file
|
|
@@ -6,6 +6,8 @@ import pytest
|
|
|
6
6
|
from fsspec.implementations.local import LocalFileSystem
|
|
7
7
|
from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401
|
|
8
8
|
from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401
|
|
9
|
+
from fsspec.tests.abstract.open import AbstractOpenTests # noqa: F401
|
|
10
|
+
from fsspec.tests.abstract.pipe import AbstractPipeTests # noqa: F401
|
|
9
11
|
from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401
|
|
10
12
|
|
|
11
13
|
|
|
@@ -225,7 +227,7 @@ class BaseAbstractFixtures:
|
|
|
225
227
|
for i in range(10):
|
|
226
228
|
hashed_i = md5(str(i).encode("utf-8")).hexdigest()
|
|
227
229
|
path = some_join(source, f"{hashed_i}.txt")
|
|
228
|
-
some_fs.pipe(path=path, value=f"{i}".encode(
|
|
230
|
+
some_fs.pipe(path=path, value=f"{i}".encode())
|
|
229
231
|
return source
|
|
230
232
|
|
|
231
233
|
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AbstractOpenTests:
|
|
5
|
+
def test_open_exclusive(self, fs, fs_target):
|
|
6
|
+
with fs.open(fs_target, "wb") as f:
|
|
7
|
+
f.write(b"data")
|
|
8
|
+
with fs.open(fs_target, "rb") as f:
|
|
9
|
+
assert f.read() == b"data"
|
|
10
|
+
with pytest.raises(FileExistsError):
|
|
11
|
+
fs.open(fs_target, "xb")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AbstractPipeTests:
|
|
5
|
+
def test_pipe_exclusive(self, fs, fs_target):
|
|
6
|
+
fs.pipe_file(fs_target, b"data")
|
|
7
|
+
assert fs.cat_file(fs_target) == b"data"
|
|
8
|
+
with pytest.raises(FileExistsError):
|
|
9
|
+
fs.pipe_file(fs_target, b"data", mode="create")
|
|
10
|
+
fs.pipe_file(fs_target, b"new data", mode="overwrite")
|
|
11
|
+
assert fs.cat_file(fs_target) == b"new data"
|
fsspec/utils.py
CHANGED
|
@@ -4,7 +4,6 @@ import contextlib
|
|
|
4
4
|
import logging
|
|
5
5
|
import math
|
|
6
6
|
import os
|
|
7
|
-
import pathlib
|
|
8
7
|
import re
|
|
9
8
|
import sys
|
|
10
9
|
import tempfile
|
|
@@ -24,6 +23,8 @@ from typing import (
|
|
|
24
23
|
from urllib.parse import urlsplit
|
|
25
24
|
|
|
26
25
|
if TYPE_CHECKING:
|
|
26
|
+
import pathlib
|
|
27
|
+
|
|
27
28
|
from typing_extensions import TypeGuard
|
|
28
29
|
|
|
29
30
|
from fsspec.spec import AbstractFileSystem
|
|
@@ -82,7 +83,8 @@ def infer_storage_options(
|
|
|
82
83
|
# https://msdn.microsoft.com/en-us/library/jj710207.aspx
|
|
83
84
|
windows_path = re.match(r"^/([a-zA-Z])[:|]([\\/].*)$", path)
|
|
84
85
|
if windows_path:
|
|
85
|
-
path =
|
|
86
|
+
drive, path = windows_path.groups()
|
|
87
|
+
path = f"{drive}:{path}"
|
|
86
88
|
|
|
87
89
|
if protocol in ["http", "https"]:
|
|
88
90
|
# for HTTP, we don't want to parse, as requests will anyway
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version: 2024.
|
|
3
|
+
Version: 2024.12.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
@@ -46,6 +46,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
46
46
|
Classifier: Programming Language :: Python :: 3.10
|
|
47
47
|
Classifier: Programming Language :: Python :: 3.11
|
|
48
48
|
Classifier: Programming Language :: Python :: 3.12
|
|
49
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
49
50
|
Requires-Python: >=3.8
|
|
50
51
|
Provides-Extra: abfs
|
|
51
52
|
Requires-Dist: adlfs; extra == 'abfs'
|
|
@@ -1,55 +1,58 @@
|
|
|
1
1
|
fsspec/__init__.py,sha256=l9MJaNNV2d4wKpCtMvXDr55n92DkdrAayGy3F9ICjzk,1998
|
|
2
|
-
fsspec/_version.py,sha256=
|
|
2
|
+
fsspec/_version.py,sha256=wQ2VhCCZZrkDgAic2RGrr4PbmFQdFL4PTIWVnK3r9tM,419
|
|
3
3
|
fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
|
|
4
|
-
fsspec/asyn.py,sha256=
|
|
5
|
-
fsspec/caching.py,sha256=
|
|
4
|
+
fsspec/asyn.py,sha256=rsnCsFUmBZmKJqg9m-IDWInoQtE4wV0rGDZEXZwuU3c,36500
|
|
5
|
+
fsspec/caching.py,sha256=oHVy9zpy4Oqk5f1t3-Q31bbw0tsmfddGGKLJs__OdKA,32790
|
|
6
6
|
fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
|
|
7
7
|
fsspec/compression.py,sha256=jCSUMJu-zSNyrusnHT0wKXgOd1tTJR6vM126i5SR5Zc,4865
|
|
8
8
|
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
|
9
9
|
fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
|
|
10
|
-
fsspec/core.py,sha256=
|
|
10
|
+
fsspec/core.py,sha256=bn-y3Mn9q8Gh3Ng_yAIDfIjyysQ95tuK78RlhlrqTb4,23828
|
|
11
11
|
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
|
12
12
|
fsspec/exceptions.py,sha256=pauSLDMxzTJMOjvX1WEUK0cMyFkrFxpWJsyFywav7A8,331
|
|
13
13
|
fsspec/fuse.py,sha256=Q-3NOOyLqBfYa4Db5E19z_ZY36zzYHtIs1mOUasItBQ,10177
|
|
14
14
|
fsspec/generic.py,sha256=AFbo-mHBt5QJV1Aplg5CJuUiiJ4bNQhcKRuwkZJdWac,13761
|
|
15
15
|
fsspec/gui.py,sha256=xBnHL2-r0LVwhDAtnHoPpXts7jd4Z32peawCJiI-7lI,13975
|
|
16
16
|
fsspec/json.py,sha256=65sQ0Y7mTj33u_Y4IId5up4abQ3bAel4E4QzbKMiQSg,3826
|
|
17
|
-
fsspec/mapping.py,sha256=
|
|
18
|
-
fsspec/parquet.py,sha256=
|
|
19
|
-
fsspec/registry.py,sha256=
|
|
20
|
-
fsspec/spec.py,sha256=
|
|
17
|
+
fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
|
|
18
|
+
fsspec/parquet.py,sha256=6ibAmG527L5JNFS0VO8BDNlxHdA3bVYqdByeiFgpUVM,19448
|
|
19
|
+
fsspec/registry.py,sha256=A2r3PiZd17192sGHLwWNFbK8RFiDA7gSbfboIJ07wTY,11471
|
|
20
|
+
fsspec/spec.py,sha256=d_NY5YVuwV7YCRduKkaR_z8B9GUna4-H9mOinymEMFY,75971
|
|
21
21
|
fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
|
|
22
|
-
fsspec/utils.py,sha256=
|
|
22
|
+
fsspec/utils.py,sha256=A11t25RnpiQ30RO6xeR0Qqlu3fGj8bnc40jg08tlYSI,22980
|
|
23
23
|
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
|
|
25
|
+
fsspec/implementations/asyn_wrapper.py,sha256=cXfSkF2AaboInIIA_6jmB796RP_BXd8u08loPAHQsxQ,2864
|
|
25
26
|
fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
|
|
26
27
|
fsspec/implementations/cache_metadata.py,sha256=pcOJYcBQY5OaC7Yhw0F3wjg08QLYApGmoISCrbs59ks,8511
|
|
27
|
-
fsspec/implementations/cached.py,sha256=
|
|
28
|
+
fsspec/implementations/cached.py,sha256=KA6c4jqrGeeg8WNPLsh8FkL3KeRAQtGLzKw18vSF1CI,32820
|
|
28
29
|
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
|
29
30
|
fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
|
|
30
31
|
fsspec/implementations/dbfs.py,sha256=a0eNjLxyfFK7pbEa52U8K-PhNHukzdGVx1eLcVniaXY,15092
|
|
31
32
|
fsspec/implementations/dirfs.py,sha256=ymakitNNQ07tW76EShyw3rC9RvIDHl4gtuOhE_h1vUg,12032
|
|
32
|
-
fsspec/implementations/ftp.py,sha256=
|
|
33
|
+
fsspec/implementations/ftp.py,sha256=sorsczLp_2J3ukONsbZY-11sRZP6H5a3V7XXf6o6ip0,11936
|
|
33
34
|
fsspec/implementations/git.py,sha256=4SElW9U5d3k3_ITlvUAx59Yk7XLNRTqkGa2C3hCUkWM,3754
|
|
34
35
|
fsspec/implementations/github.py,sha256=eAn1kJ7VeWR6gVoVRLBYclF_rQDXSJU-xzMXpvPQWqs,8002
|
|
35
|
-
fsspec/implementations/http.py,sha256=
|
|
36
|
+
fsspec/implementations/http.py,sha256=d7G7_pRTMHouKE42lvRNHqB5u4XQi0dm4wb-6U_IiF4,29361
|
|
36
37
|
fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
|
|
37
38
|
fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
|
|
38
|
-
fsspec/implementations/local.py,sha256=
|
|
39
|
-
fsspec/implementations/memory.py,sha256=
|
|
40
|
-
fsspec/implementations/reference.py,sha256=
|
|
39
|
+
fsspec/implementations/local.py,sha256=YvR9b2MndSQIHszAMUkFvN65eWVbIfoGJJjAeS43ZS4,15259
|
|
40
|
+
fsspec/implementations/memory.py,sha256=cLNrK9wk97sl4Tre9uVDXWj6mEHvvVVIgaVgNA5KVIg,10527
|
|
41
|
+
fsspec/implementations/reference.py,sha256=E-XYtnsHpE1e4x2io1ILOioGsWpCLDH1bqYN3QPGUJI,45930
|
|
41
42
|
fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
|
|
42
43
|
fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
|
|
43
44
|
fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
|
|
44
|
-
fsspec/implementations/webhdfs.py,sha256=
|
|
45
|
+
fsspec/implementations/webhdfs.py,sha256=G9wGywj7BkZk4Mu9zXu6HaDlEqX4F8Gw1i4k46CP_-o,16769
|
|
45
46
|
fsspec/implementations/zip.py,sha256=9LBMHPft2OutJl2Ft-r9u_z3GptLkc2n91ur2A3bCbg,6072
|
|
46
|
-
fsspec/tests/abstract/__init__.py,sha256=
|
|
47
|
+
fsspec/tests/abstract/__init__.py,sha256=4xUJrv7gDgc85xAOz1p-V_K1hrsdMWTSa0rviALlJk8,10181
|
|
47
48
|
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
|
48
49
|
fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
|
|
49
50
|
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
|
50
51
|
fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1982
|
|
52
|
+
fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
|
|
53
|
+
fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
|
|
51
54
|
fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
|
|
52
|
-
fsspec-2024.
|
|
53
|
-
fsspec-2024.
|
|
54
|
-
fsspec-2024.
|
|
55
|
-
fsspec-2024.
|
|
55
|
+
fsspec-2024.12.0.dist-info/METADATA,sha256=bZRSxMVT7oHOUheJQEDMsChBFuby1pTg8WUbnLYATrc,11801
|
|
56
|
+
fsspec-2024.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
57
|
+
fsspec-2024.12.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
|
58
|
+
fsspec-2024.12.0.dist-info/RECORD,,
|
|
File without changes
|