fsspec 2024.9.0__py3-none-any.whl → 2024.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/asyn.py +9 -7
- fsspec/caching.py +34 -19
- fsspec/core.py +20 -15
- fsspec/implementations/asyn_wrapper.py +98 -0
- fsspec/implementations/cached.py +1 -1
- fsspec/implementations/dirfs.py +12 -0
- fsspec/implementations/ftp.py +1 -1
- fsspec/implementations/git.py +27 -39
- fsspec/implementations/http.py +14 -30
- fsspec/implementations/local.py +6 -1
- fsspec/implementations/memory.py +15 -6
- fsspec/implementations/reference.py +58 -15
- fsspec/implementations/webhdfs.py +2 -1
- fsspec/implementations/zip.py +2 -1
- fsspec/mapping.py +1 -1
- fsspec/parquet.py +1 -1
- fsspec/registry.py +4 -0
- fsspec/spec.py +209 -35
- fsspec/tests/abstract/__init__.py +3 -1
- fsspec/tests/abstract/open.py +11 -0
- fsspec/tests/abstract/pipe.py +11 -0
- fsspec/utils.py +4 -2
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/METADATA +3 -2
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/RECORD +27 -24
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/WHEEL +1 -1
- {fsspec-2024.9.0.dist-info → fsspec-2024.12.0.dist-info}/licenses/LICENSE +0 -0
fsspec/_version.py
CHANGED
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '2024.
|
|
16
|
-
__version_tuple__ = version_tuple = (2024,
|
|
15
|
+
__version__ = version = '2024.12.0'
|
|
16
|
+
__version_tuple__ = version_tuple = (2024, 12, 0)
|
fsspec/asyn.py
CHANGED
|
@@ -344,6 +344,10 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
344
344
|
async def _cp_file(self, path1, path2, **kwargs):
|
|
345
345
|
raise NotImplementedError
|
|
346
346
|
|
|
347
|
+
async def _mv_file(self, path1, path2):
|
|
348
|
+
await self._cp_file(path1, path2)
|
|
349
|
+
await self._rm_file(path1)
|
|
350
|
+
|
|
347
351
|
async def _copy(
|
|
348
352
|
self,
|
|
349
353
|
path1,
|
|
@@ -404,7 +408,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
404
408
|
continue
|
|
405
409
|
raise ex
|
|
406
410
|
|
|
407
|
-
async def _pipe_file(self, path, value, **kwargs):
|
|
411
|
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
408
412
|
raise NotImplementedError
|
|
409
413
|
|
|
410
414
|
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
|
@@ -513,7 +517,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
513
517
|
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
|
514
518
|
)
|
|
515
519
|
|
|
516
|
-
async def _put_file(self, lpath, rpath, **kwargs):
|
|
520
|
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
|
517
521
|
raise NotImplementedError
|
|
518
522
|
|
|
519
523
|
async def _put(
|
|
@@ -812,11 +816,9 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
812
816
|
p: info
|
|
813
817
|
for p, info in sorted(allpaths.items())
|
|
814
818
|
if pattern.match(
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
else p
|
|
819
|
-
)
|
|
819
|
+
p + "/"
|
|
820
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
821
|
+
else p
|
|
820
822
|
)
|
|
821
823
|
}
|
|
822
824
|
|
fsspec/caching.py
CHANGED
|
@@ -8,6 +8,8 @@ import os
|
|
|
8
8
|
import threading
|
|
9
9
|
import warnings
|
|
10
10
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
11
|
+
from itertools import groupby
|
|
12
|
+
from operator import itemgetter
|
|
11
13
|
from typing import (
|
|
12
14
|
TYPE_CHECKING,
|
|
13
15
|
Any,
|
|
@@ -85,12 +87,7 @@ class BaseCache:
|
|
|
85
87
|
if self.hit_count == 0 and self.miss_count == 0:
|
|
86
88
|
# a cache that does nothing, this is for logs only
|
|
87
89
|
return ""
|
|
88
|
-
return " ,
|
|
89
|
-
self.name,
|
|
90
|
-
self.hit_count,
|
|
91
|
-
self.miss_count,
|
|
92
|
-
self.total_requested_bytes,
|
|
93
|
-
)
|
|
90
|
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
|
94
91
|
|
|
95
92
|
def __repr__(self) -> str:
|
|
96
93
|
# TODO: use rich for better formatting
|
|
@@ -161,21 +158,39 @@ class MMapCache(BaseCache):
|
|
|
161
158
|
return b""
|
|
162
159
|
start_block = start // self.blocksize
|
|
163
160
|
end_block = end // self.blocksize
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
161
|
+
block_range = range(start_block, end_block + 1)
|
|
162
|
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
|
163
|
+
need = (i for i in block_range if i not in self.blocks)
|
|
164
|
+
# Count the number of blocks already cached
|
|
165
|
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
|
166
|
+
|
|
167
|
+
# Consolidate needed blocks.
|
|
168
|
+
# Algorithm adapted from Python 2.x itertools documentation.
|
|
169
|
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
|
170
|
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
|
171
|
+
# we can detect when the block number skips values. The key computes this difference.
|
|
172
|
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
|
173
|
+
# and a new group is started. In other words, this algorithm neatly groups
|
|
174
|
+
# runs of consecutive block numbers so they can be fetched together.
|
|
175
|
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
|
176
|
+
# Extract the blocks from the enumerated sequence
|
|
177
|
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
|
178
|
+
# Compute start of first block
|
|
179
|
+
sstart = _blocks[0] * self.blocksize
|
|
180
|
+
# Compute the end of the last block. Last block may not be full size.
|
|
181
|
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
|
182
|
+
|
|
183
|
+
# Fetch bytes (could be multiple consecutive blocks)
|
|
175
184
|
self.total_requested_bytes += send - sstart
|
|
176
|
-
logger.debug(
|
|
185
|
+
logger.debug(
|
|
186
|
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
|
187
|
+
)
|
|
177
188
|
self.cache[sstart:send] = self.fetcher(sstart, send)
|
|
178
|
-
|
|
189
|
+
|
|
190
|
+
# Update set of cached blocks
|
|
191
|
+
self.blocks.update(_blocks)
|
|
192
|
+
# Update cache statistics with number of blocks we had to cache
|
|
193
|
+
self.miss_count += len(_blocks)
|
|
179
194
|
|
|
180
195
|
return self.cache[start:end]
|
|
181
196
|
|
fsspec/core.py
CHANGED
|
@@ -329,12 +329,19 @@ def open_files(
|
|
|
329
329
|
|
|
330
330
|
|
|
331
331
|
def _un_chain(path, kwargs):
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
332
|
+
# Avoid a circular import
|
|
333
|
+
from fsspec.implementations.cached import CachingFileSystem
|
|
334
|
+
|
|
335
|
+
if "::" in path:
|
|
336
|
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
|
337
|
+
bits = []
|
|
338
|
+
for p in path.split("::"):
|
|
339
|
+
if "://" in p or x.match(p):
|
|
340
|
+
bits.append(p)
|
|
341
|
+
else:
|
|
342
|
+
bits.append(p + "://")
|
|
343
|
+
else:
|
|
344
|
+
bits = [path]
|
|
338
345
|
# [[url, protocol, kwargs], ...]
|
|
339
346
|
out = []
|
|
340
347
|
previous_bit = None
|
|
@@ -346,12 +353,12 @@ def _un_chain(path, kwargs):
|
|
|
346
353
|
kws = kwargs.pop(protocol, {})
|
|
347
354
|
if bit is bits[0]:
|
|
348
355
|
kws.update(kwargs)
|
|
349
|
-
kw = dict(
|
|
356
|
+
kw = dict(
|
|
357
|
+
**{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
|
|
358
|
+
**kws,
|
|
359
|
+
)
|
|
350
360
|
bit = cls._strip_protocol(bit)
|
|
351
|
-
if (
|
|
352
|
-
protocol in {"blockcache", "filecache", "simplecache"}
|
|
353
|
-
and "target_protocol" not in kw
|
|
354
|
-
):
|
|
361
|
+
if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
|
|
355
362
|
bit = previous_bit
|
|
356
363
|
out.append((bit, protocol, kw))
|
|
357
364
|
previous_bit = bit
|
|
@@ -578,7 +585,7 @@ def expand_paths_if_needed(paths, mode, num, fs, name_function):
|
|
|
578
585
|
paths = list(paths)
|
|
579
586
|
|
|
580
587
|
if "w" in mode: # read mode
|
|
581
|
-
if sum(
|
|
588
|
+
if sum(1 for p in paths if "*" in p) > 1:
|
|
582
589
|
raise ValueError(
|
|
583
590
|
"When writing data, only one filename mask can be specified."
|
|
584
591
|
)
|
|
@@ -673,9 +680,7 @@ def get_fs_token_paths(
|
|
|
673
680
|
elif not isinstance(paths, list):
|
|
674
681
|
paths = list(paths)
|
|
675
682
|
else:
|
|
676
|
-
if "w" in mode and expand:
|
|
677
|
-
paths = _expand_paths(paths, name_function, num)
|
|
678
|
-
elif "x" in mode and expand:
|
|
683
|
+
if ("w" in mode or "x" in mode) and expand:
|
|
679
684
|
paths = _expand_paths(paths, name_function, num)
|
|
680
685
|
elif "*" in paths:
|
|
681
686
|
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
|
|
5
|
+
from fsspec.asyn import AsyncFileSystem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def async_wrapper(func, obj=None):
|
|
9
|
+
"""
|
|
10
|
+
Wraps a synchronous function to make it awaitable.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
func : callable
|
|
15
|
+
The synchronous function to wrap.
|
|
16
|
+
obj : object, optional
|
|
17
|
+
The instance to bind the function to, if applicable.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
coroutine
|
|
22
|
+
An awaitable version of the function.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@functools.wraps(func)
|
|
26
|
+
async def wrapper(*args, **kwargs):
|
|
27
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
|
28
|
+
|
|
29
|
+
return wrapper
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
33
|
+
"""
|
|
34
|
+
A wrapper class to convert a synchronous filesystem into an asynchronous one.
|
|
35
|
+
|
|
36
|
+
This class takes an existing synchronous filesystem implementation and wraps all
|
|
37
|
+
its methods to provide an asynchronous interface.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
sync_fs : AbstractFileSystem
|
|
42
|
+
The synchronous filesystem instance to wrap.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, sync_fs, *args, **kwargs):
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
self.asynchronous = True
|
|
48
|
+
self.sync_fs = sync_fs
|
|
49
|
+
self.protocol = self.sync_fs.protocol
|
|
50
|
+
self._wrap_all_sync_methods()
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def fsid(self):
|
|
54
|
+
return f"async_{self.sync_fs.fsid}"
|
|
55
|
+
|
|
56
|
+
def _wrap_all_sync_methods(self):
|
|
57
|
+
"""
|
|
58
|
+
Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
|
|
59
|
+
"""
|
|
60
|
+
for method_name in dir(self.sync_fs):
|
|
61
|
+
if method_name.startswith("_"):
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
attr = inspect.getattr_static(self.sync_fs, method_name)
|
|
65
|
+
if isinstance(attr, property):
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
method = getattr(self.sync_fs, method_name)
|
|
69
|
+
if callable(method) and not asyncio.iscoroutinefunction(method):
|
|
70
|
+
async_method = async_wrapper(method, obj=self)
|
|
71
|
+
setattr(self, f"_{method_name}", async_method)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def wrap_class(cls, sync_fs_class):
|
|
75
|
+
"""
|
|
76
|
+
Create a new class that can be used to instantiate an AsyncFileSystemWrapper
|
|
77
|
+
with lazy instantiation of the underlying synchronous filesystem.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
sync_fs_class : type
|
|
82
|
+
The class of the synchronous filesystem to wrap.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
type
|
|
87
|
+
A new class that wraps the provided synchronous filesystem class.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
class GeneratedAsyncFileSystemWrapper(cls):
|
|
91
|
+
def __init__(self, *args, **kwargs):
|
|
92
|
+
sync_fs = sync_fs_class(*args, **kwargs)
|
|
93
|
+
super().__init__(sync_fs)
|
|
94
|
+
|
|
95
|
+
GeneratedAsyncFileSystemWrapper.__name__ = (
|
|
96
|
+
f"Async{sync_fs_class.__name__}Wrapper"
|
|
97
|
+
)
|
|
98
|
+
return GeneratedAsyncFileSystemWrapper
|
fsspec/implementations/cached.py
CHANGED
|
@@ -612,7 +612,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
612
612
|
**kwargs,
|
|
613
613
|
):
|
|
614
614
|
paths = self.expand_path(
|
|
615
|
-
path, recursive=recursive, maxdepth=kwargs.get("maxdepth"
|
|
615
|
+
path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
|
|
616
616
|
)
|
|
617
617
|
getpaths = []
|
|
618
618
|
storepaths = []
|
fsspec/implementations/dirfs.py
CHANGED
fsspec/implementations/ftp.py
CHANGED
fsspec/implementations/git.py
CHANGED
|
@@ -55,6 +55,8 @@ class GitFileSystem(AbstractFileSystem):
|
|
|
55
55
|
tree = comm.tree
|
|
56
56
|
for part in parts:
|
|
57
57
|
if part and isinstance(tree, pygit2.Tree):
|
|
58
|
+
if part not in tree:
|
|
59
|
+
raise FileNotFoundError(path)
|
|
58
60
|
tree = tree[part]
|
|
59
61
|
return tree
|
|
60
62
|
|
|
@@ -69,46 +71,32 @@ class GitFileSystem(AbstractFileSystem):
|
|
|
69
71
|
out["ref"], path = path.split("@", 1)
|
|
70
72
|
return out
|
|
71
73
|
|
|
74
|
+
@staticmethod
|
|
75
|
+
def _object_to_info(obj, path=None):
|
|
76
|
+
# obj.name and obj.filemode are None for the root tree!
|
|
77
|
+
is_dir = isinstance(obj, pygit2.Tree)
|
|
78
|
+
return {
|
|
79
|
+
"type": "directory" if is_dir else "file",
|
|
80
|
+
"name": (
|
|
81
|
+
"/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
|
|
82
|
+
),
|
|
83
|
+
"hex": str(obj.id),
|
|
84
|
+
"mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
|
|
85
|
+
"size": 0 if is_dir else obj.size,
|
|
86
|
+
}
|
|
87
|
+
|
|
72
88
|
def ls(self, path, detail=True, ref=None, **kwargs):
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
"mode": f"{obj.filemode:o}",
|
|
85
|
-
"size": 0,
|
|
86
|
-
}
|
|
87
|
-
)
|
|
88
|
-
else:
|
|
89
|
-
out.append(
|
|
90
|
-
{
|
|
91
|
-
"type": "file",
|
|
92
|
-
"name": "/".join([path, obj.name]).lstrip("/"),
|
|
93
|
-
"hex": obj.hex,
|
|
94
|
-
"mode": f"{obj.filemode:o}",
|
|
95
|
-
"size": obj.size,
|
|
96
|
-
}
|
|
97
|
-
)
|
|
98
|
-
else:
|
|
99
|
-
obj = tree
|
|
100
|
-
out = [
|
|
101
|
-
{
|
|
102
|
-
"type": "file",
|
|
103
|
-
"name": obj.name,
|
|
104
|
-
"hex": obj.hex,
|
|
105
|
-
"mode": f"{obj.filemode:o}",
|
|
106
|
-
"size": obj.size,
|
|
107
|
-
}
|
|
108
|
-
]
|
|
109
|
-
if detail:
|
|
110
|
-
return out
|
|
111
|
-
return [o["name"] for o in out]
|
|
89
|
+
tree = self._path_to_object(self._strip_protocol(path), ref)
|
|
90
|
+
return [
|
|
91
|
+
GitFileSystem._object_to_info(obj, path)
|
|
92
|
+
if detail
|
|
93
|
+
else GitFileSystem._object_to_info(obj, path)["name"]
|
|
94
|
+
for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
def info(self, path, ref=None, **kwargs):
|
|
98
|
+
tree = self._path_to_object(self._strip_protocol(path), ref)
|
|
99
|
+
return GitFileSystem._object_to_info(tree, path)
|
|
112
100
|
|
|
113
101
|
def ukey(self, path, ref=None):
|
|
114
102
|
return self.info(path, ref=ref)["hex"]
|
fsspec/implementations/http.py
CHANGED
|
@@ -273,8 +273,12 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
273
273
|
chunk_size=5 * 2**20,
|
|
274
274
|
callback=DEFAULT_CALLBACK,
|
|
275
275
|
method="post",
|
|
276
|
+
mode="overwrite",
|
|
276
277
|
**kwargs,
|
|
277
278
|
):
|
|
279
|
+
if mode != "overwrite":
|
|
280
|
+
raise NotImplementedError("Exclusive write")
|
|
281
|
+
|
|
278
282
|
async def gen_chunks():
|
|
279
283
|
# Support passing arbitrary file-like objects
|
|
280
284
|
# and use them instead of streams.
|
|
@@ -358,9 +362,10 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
358
362
|
kw = self.kwargs.copy()
|
|
359
363
|
kw["asynchronous"] = self.asynchronous
|
|
360
364
|
kw.update(kwargs)
|
|
361
|
-
|
|
365
|
+
info = {}
|
|
366
|
+
size = size or info.update(self.info(path, **kwargs)) or info["size"]
|
|
362
367
|
session = sync(self.loop, self.set_session)
|
|
363
|
-
if block_size and size:
|
|
368
|
+
if block_size and size and info.get("partial", True):
|
|
364
369
|
return HTTPFile(
|
|
365
370
|
self,
|
|
366
371
|
path,
|
|
@@ -520,9 +525,9 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
520
525
|
|
|
521
526
|
class HTTPFile(AbstractBufferedFile):
|
|
522
527
|
"""
|
|
523
|
-
A file-like object pointing to a
|
|
528
|
+
A file-like object pointing to a remote HTTP(S) resource
|
|
524
529
|
|
|
525
|
-
Supports only reading, with read-ahead of a
|
|
530
|
+
Supports only reading, with read-ahead of a predetermined block-size.
|
|
526
531
|
|
|
527
532
|
In the case that the server does not supply the filesize, only reading of
|
|
528
533
|
the complete file in one go is supported.
|
|
@@ -691,25 +696,6 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
691
696
|
|
|
692
697
|
_fetch_range = sync_wrapper(async_fetch_range)
|
|
693
698
|
|
|
694
|
-
def __reduce__(self):
|
|
695
|
-
return (
|
|
696
|
-
reopen,
|
|
697
|
-
(
|
|
698
|
-
self.fs,
|
|
699
|
-
self.url,
|
|
700
|
-
self.mode,
|
|
701
|
-
self.blocksize,
|
|
702
|
-
self.cache.name if self.cache else "none",
|
|
703
|
-
self.size,
|
|
704
|
-
),
|
|
705
|
-
)
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
def reopen(fs, url, mode, blocksize, cache_type, size=None):
|
|
709
|
-
return fs.open(
|
|
710
|
-
url, mode=mode, block_size=blocksize, cache_type=cache_type, size=size
|
|
711
|
-
)
|
|
712
|
-
|
|
713
699
|
|
|
714
700
|
magic_check = re.compile("([*[])")
|
|
715
701
|
|
|
@@ -759,9 +745,6 @@ class HTTPStreamFile(AbstractBufferedFile):
|
|
|
759
745
|
asyncio.run_coroutine_threadsafe(self._close(), self.loop)
|
|
760
746
|
super().close()
|
|
761
747
|
|
|
762
|
-
def __reduce__(self):
|
|
763
|
-
return reopen, (self.fs, self.url, self.mode, self.blocksize, self.cache.name)
|
|
764
|
-
|
|
765
748
|
|
|
766
749
|
class AsyncStreamFile(AbstractAsyncStreamedFile):
|
|
767
750
|
def __init__(
|
|
@@ -835,10 +818,6 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
835
818
|
async with r:
|
|
836
819
|
r.raise_for_status()
|
|
837
820
|
|
|
838
|
-
# TODO:
|
|
839
|
-
# recognise lack of 'Accept-Ranges',
|
|
840
|
-
# or 'Accept-Ranges': 'none' (not 'bytes')
|
|
841
|
-
# to mean streaming only, no random access => return None
|
|
842
821
|
if "Content-Length" in r.headers:
|
|
843
822
|
# Some servers may choose to ignore Accept-Encoding and return
|
|
844
823
|
# compressed content, in which case the returned size is unreliable.
|
|
@@ -853,6 +832,11 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
853
832
|
if "Content-Type" in r.headers:
|
|
854
833
|
info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
|
|
855
834
|
|
|
835
|
+
if r.headers.get("Accept-Ranges") == "none":
|
|
836
|
+
# Some servers may explicitly discourage partial content requests, but
|
|
837
|
+
# the lack of "Accept-Ranges" does not always indicate they would fail
|
|
838
|
+
info["partial"] = False
|
|
839
|
+
|
|
856
840
|
info["url"] = str(r.url)
|
|
857
841
|
|
|
858
842
|
for checksum_field in ["ETag", "Content-MD5", "Digest"]:
|
fsspec/implementations/local.py
CHANGED
|
@@ -60,7 +60,12 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
60
60
|
info = self.info(path)
|
|
61
61
|
if info["type"] == "directory":
|
|
62
62
|
with os.scandir(path) as it:
|
|
63
|
-
infos = [
|
|
63
|
+
infos = []
|
|
64
|
+
for f in it:
|
|
65
|
+
try:
|
|
66
|
+
infos.append(self.info(f))
|
|
67
|
+
except FileNotFoundError:
|
|
68
|
+
pass
|
|
64
69
|
else:
|
|
65
70
|
infos = [info]
|
|
66
71
|
|
fsspec/implementations/memory.py
CHANGED
|
@@ -126,12 +126,13 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
126
126
|
if not exist_ok:
|
|
127
127
|
raise
|
|
128
128
|
|
|
129
|
-
def pipe_file(self, path, value, **kwargs):
|
|
129
|
+
def pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
130
130
|
"""Set the bytes of given file
|
|
131
131
|
|
|
132
132
|
Avoids copies of the data if possible
|
|
133
133
|
"""
|
|
134
|
-
|
|
134
|
+
mode = "xb" if mode == "create" else "wb"
|
|
135
|
+
self.open(path, mode=mode, data=value)
|
|
135
136
|
|
|
136
137
|
def rmdir(self, path):
|
|
137
138
|
path = self._strip_protocol(path)
|
|
@@ -178,6 +179,8 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
178
179
|
**kwargs,
|
|
179
180
|
):
|
|
180
181
|
path = self._strip_protocol(path)
|
|
182
|
+
if "x" in mode and self.exists(path):
|
|
183
|
+
raise FileExistsError
|
|
181
184
|
if path in self.pseudo_dirs:
|
|
182
185
|
raise IsADirectoryError(path)
|
|
183
186
|
parent = path
|
|
@@ -197,7 +200,9 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
197
200
|
return f
|
|
198
201
|
else:
|
|
199
202
|
raise FileNotFoundError(path)
|
|
200
|
-
elif mode
|
|
203
|
+
elif mode in {"wb", "xb"}:
|
|
204
|
+
if mode == "xb" and self.exists(path):
|
|
205
|
+
raise FileExistsError
|
|
201
206
|
m = MemoryFile(self, path, kwargs.get("data"))
|
|
202
207
|
if not self._intrans:
|
|
203
208
|
m.commit()
|
|
@@ -248,6 +253,10 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
248
253
|
except KeyError as e:
|
|
249
254
|
raise FileNotFoundError(path) from e
|
|
250
255
|
|
|
256
|
+
def isfile(self, path):
|
|
257
|
+
path = self._strip_protocol(path)
|
|
258
|
+
return path in self.store
|
|
259
|
+
|
|
251
260
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
252
261
|
if isinstance(path, str):
|
|
253
262
|
path = self._strip_protocol(path)
|
|
@@ -255,14 +264,14 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
255
264
|
path = [self._strip_protocol(p) for p in path]
|
|
256
265
|
paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
|
257
266
|
for p in reversed(paths):
|
|
267
|
+
if self.isfile(p):
|
|
268
|
+
self.rm_file(p)
|
|
258
269
|
# If the expanded path doesn't exist, it is only because the expanded
|
|
259
270
|
# path was a directory that does not exist in self.pseudo_dirs. This
|
|
260
271
|
# is possible if you directly create files without making the
|
|
261
272
|
# directories first.
|
|
262
|
-
|
|
273
|
+
elif not self.exists(p):
|
|
263
274
|
continue
|
|
264
|
-
if self.isfile(p):
|
|
265
|
-
self.rm_file(p)
|
|
266
275
|
else:
|
|
267
276
|
self.rmdir(p)
|
|
268
277
|
|