fsspec 2023.6.0__py3-none-any.whl → 2023.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +3 -3
- fsspec/asyn.py +154 -92
- fsspec/caching.py +1 -1
- fsspec/compression.py +7 -2
- fsspec/core.py +16 -8
- fsspec/generic.py +111 -17
- fsspec/gui.py +4 -2
- fsspec/implementations/cache_mapper.py +80 -0
- fsspec/implementations/cache_metadata.py +232 -0
- fsspec/implementations/cached.py +74 -157
- fsspec/implementations/dirfs.py +3 -1
- fsspec/implementations/http.py +36 -19
- fsspec/implementations/local.py +4 -21
- fsspec/implementations/memory.py +8 -9
- fsspec/implementations/reference.py +8 -8
- fsspec/implementations/sftp.py +6 -2
- fsspec/implementations/smb.py +39 -23
- fsspec/mapping.py +8 -0
- fsspec/registry.py +22 -0
- fsspec/spec.py +164 -96
- fsspec/tests/abstract/__init__.py +147 -0
- fsspec/tests/abstract/common.py +175 -0
- fsspec/tests/abstract/copy.py +250 -56
- fsspec/tests/abstract/get.py +248 -38
- fsspec/tests/abstract/put.py +246 -66
- fsspec/utils.py +25 -8
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/METADATA +1 -1
- fsspec-2023.9.1.dist-info/RECORD +54 -0
- fsspec-2023.6.0.dist-info/RECORD +0 -51
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/LICENSE +0 -0
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/WHEEL +0 -0
- {fsspec-2023.6.0.dist-info → fsspec-2023.9.1.dist-info}/top_level.txt +0 -0
fsspec/gui.py
CHANGED
|
@@ -257,12 +257,14 @@ class FileSelector(SigSlot):
|
|
|
257
257
|
width_policy="max",
|
|
258
258
|
)
|
|
259
259
|
self.protocol = pn.widgets.Select(
|
|
260
|
-
options=
|
|
260
|
+
options=sorted(known_implementations),
|
|
261
261
|
value=self.init_protocol,
|
|
262
262
|
name="protocol",
|
|
263
263
|
align="center",
|
|
264
264
|
)
|
|
265
|
-
self.kwargs = pn.widgets.TextInput(
|
|
265
|
+
self.kwargs = pn.widgets.TextInput(
|
|
266
|
+
name="kwargs", value=self.init_kwargs, align="center"
|
|
267
|
+
)
|
|
266
268
|
self.go = pn.widgets.Button(name="⇨", align="end", width=45)
|
|
267
269
|
self.main = SingleSelect(size=10)
|
|
268
270
|
self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import hashlib
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from fsspec.implementations.local import make_path_posix
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AbstractCacheMapper(abc.ABC):
|
|
14
|
+
"""Abstract super-class for mappers from remote URLs to local cached
|
|
15
|
+
basenames.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@abc.abstractmethod
|
|
19
|
+
def __call__(self, path: str) -> str:
|
|
20
|
+
...
|
|
21
|
+
|
|
22
|
+
def __eq__(self, other: Any) -> bool:
|
|
23
|
+
# Identity only depends on class. When derived classes have attributes
|
|
24
|
+
# they will need to be included.
|
|
25
|
+
return isinstance(other, type(self))
|
|
26
|
+
|
|
27
|
+
def __hash__(self) -> int:
|
|
28
|
+
# Identity only depends on class. When derived classes have attributes
|
|
29
|
+
# they will need to be included.
|
|
30
|
+
return hash(type(self))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BasenameCacheMapper(AbstractCacheMapper):
|
|
34
|
+
"""Cache mapper that uses the basename of the remote URL and a fixed number
|
|
35
|
+
of directory levels above this.
|
|
36
|
+
|
|
37
|
+
The default is zero directory levels, meaning different paths with the same
|
|
38
|
+
basename will have the same cached basename.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, directory_levels: int = 0):
|
|
42
|
+
if directory_levels < 0:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
"BasenameCacheMapper requires zero or positive directory_levels"
|
|
45
|
+
)
|
|
46
|
+
self.directory_levels = directory_levels
|
|
47
|
+
|
|
48
|
+
# Separator for directories when encoded as strings.
|
|
49
|
+
self._separator = "_@_"
|
|
50
|
+
|
|
51
|
+
def __call__(self, path: str) -> str:
|
|
52
|
+
path = make_path_posix(path)
|
|
53
|
+
prefix, *bits = path.rsplit("/", self.directory_levels + 1)
|
|
54
|
+
if bits:
|
|
55
|
+
return self._separator.join(bits)
|
|
56
|
+
else:
|
|
57
|
+
return prefix # No separator found, simple filename
|
|
58
|
+
|
|
59
|
+
def __eq__(self, other: Any) -> bool:
|
|
60
|
+
return super().__eq__(other) and self.directory_levels == other.directory_levels
|
|
61
|
+
|
|
62
|
+
def __hash__(self) -> int:
|
|
63
|
+
return super().__hash__() ^ hash(self.directory_levels)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class HashCacheMapper(AbstractCacheMapper):
|
|
67
|
+
"""Cache mapper that uses a hash of the remote URL."""
|
|
68
|
+
|
|
69
|
+
def __call__(self, path: str) -> str:
|
|
70
|
+
return hashlib.sha256(path.encode()).hexdigest()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
|
|
74
|
+
"""Factory method to create cache mapper for backward compatibility with
|
|
75
|
+
``CachingFileSystem`` constructor using ``same_names`` kwarg.
|
|
76
|
+
"""
|
|
77
|
+
if same_names:
|
|
78
|
+
return BasenameCacheMapper()
|
|
79
|
+
else:
|
|
80
|
+
return HashCacheMapper()
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import pickle
|
|
5
|
+
import time
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from fsspec.utils import atomic_write
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import ujson as json
|
|
12
|
+
except ImportError:
|
|
13
|
+
if not TYPE_CHECKING:
|
|
14
|
+
import json
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from typing import Any, Dict, Iterator, Literal
|
|
18
|
+
|
|
19
|
+
from typing_extensions import TypeAlias
|
|
20
|
+
|
|
21
|
+
from .cached import CachingFileSystem
|
|
22
|
+
|
|
23
|
+
Detail: TypeAlias = Dict[str, Any]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CacheMetadata:
|
|
27
|
+
"""Cache metadata.
|
|
28
|
+
|
|
29
|
+
All reading and writing of cache metadata is performed by this class,
|
|
30
|
+
accessing the cached files and blocks is not.
|
|
31
|
+
|
|
32
|
+
Metadata is stored in a single file per storage directory in JSON format.
|
|
33
|
+
For backward compatibility, also reads metadata stored in pickle format
|
|
34
|
+
which is converted to JSON when next saved.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, storage: list[str]):
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
storage: list[str]
|
|
43
|
+
Directories containing cached files, must be at least one. Metadata
|
|
44
|
+
is stored in the last of these directories by convention.
|
|
45
|
+
"""
|
|
46
|
+
if not storage:
|
|
47
|
+
raise ValueError("CacheMetadata expects at least one storage location")
|
|
48
|
+
|
|
49
|
+
self._storage = storage
|
|
50
|
+
self.cached_files: list[Detail] = [{}]
|
|
51
|
+
|
|
52
|
+
# Private attribute to force saving of metadata in pickle format rather than
|
|
53
|
+
# JSON for use in tests to confirm can read both pickle and JSON formats.
|
|
54
|
+
self._force_save_pickle = False
|
|
55
|
+
|
|
56
|
+
def _load(self, fn: str) -> Detail:
|
|
57
|
+
"""Low-level function to load metadata from specific file"""
|
|
58
|
+
try:
|
|
59
|
+
with open(fn, "r") as f:
|
|
60
|
+
return json.load(f)
|
|
61
|
+
except ValueError:
|
|
62
|
+
with open(fn, "rb") as f:
|
|
63
|
+
return pickle.load(f)
|
|
64
|
+
|
|
65
|
+
def _save(self, metadata_to_save: Detail, fn: str) -> None:
|
|
66
|
+
"""Low-level function to save metadata to specific file"""
|
|
67
|
+
if self._force_save_pickle:
|
|
68
|
+
with atomic_write(fn) as f:
|
|
69
|
+
pickle.dump(metadata_to_save, f)
|
|
70
|
+
else:
|
|
71
|
+
with atomic_write(fn, mode="w") as f:
|
|
72
|
+
json.dump(metadata_to_save, f)
|
|
73
|
+
|
|
74
|
+
def _scan_locations(
|
|
75
|
+
self, writable_only: bool = False
|
|
76
|
+
) -> Iterator[tuple[str, str, bool]]:
|
|
77
|
+
"""Yield locations (filenames) where metadata is stored, and whether
|
|
78
|
+
writable or not.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
writable: bool
|
|
83
|
+
Set to True to only yield writable locations.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
Yields (str, str, bool)
|
|
88
|
+
"""
|
|
89
|
+
n = len(self._storage)
|
|
90
|
+
for i, storage in enumerate(self._storage):
|
|
91
|
+
writable = i == n - 1
|
|
92
|
+
if writable_only and not writable:
|
|
93
|
+
continue
|
|
94
|
+
yield os.path.join(storage, "cache"), storage, writable
|
|
95
|
+
|
|
96
|
+
def check_file(
|
|
97
|
+
self, path: str, cfs: CachingFileSystem | None
|
|
98
|
+
) -> Literal[False] | tuple[Detail, str]:
|
|
99
|
+
"""If path is in cache return its details, otherwise return ``False``.
|
|
100
|
+
|
|
101
|
+
If the optional CachingFileSystem is specified then it is used to
|
|
102
|
+
perform extra checks to reject possible matches, such as if they are
|
|
103
|
+
too old.
|
|
104
|
+
"""
|
|
105
|
+
for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files):
|
|
106
|
+
if path not in cache:
|
|
107
|
+
continue
|
|
108
|
+
detail = cache[path].copy()
|
|
109
|
+
|
|
110
|
+
if cfs is not None:
|
|
111
|
+
if cfs.check_files and detail["uid"] != cfs.fs.ukey(path):
|
|
112
|
+
# Wrong file as determined by hash of file properties
|
|
113
|
+
continue
|
|
114
|
+
if cfs.expiry and time.time() - detail["time"] > cfs.expiry:
|
|
115
|
+
# Cached file has expired
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
fn = os.path.join(base, detail["fn"])
|
|
119
|
+
if os.path.exists(fn):
|
|
120
|
+
return detail, fn
|
|
121
|
+
return False
|
|
122
|
+
|
|
123
|
+
def clear_expired(self, expiry_time: int) -> tuple[list[str], bool]:
|
|
124
|
+
"""Remove expired metadata from the cache.
|
|
125
|
+
|
|
126
|
+
Returns names of files corresponding to expired metadata and a boolean
|
|
127
|
+
flag indicating whether the writable cache is empty. Caller is
|
|
128
|
+
responsible for deleting the expired files.
|
|
129
|
+
"""
|
|
130
|
+
expired_files = []
|
|
131
|
+
for path, detail in self.cached_files[-1].copy().items():
|
|
132
|
+
if time.time() - detail["time"] > expiry_time:
|
|
133
|
+
fn = detail.get("fn", "")
|
|
134
|
+
if not fn:
|
|
135
|
+
raise RuntimeError(
|
|
136
|
+
f"Cache metadata does not contain 'fn' for {path}"
|
|
137
|
+
)
|
|
138
|
+
fn = os.path.join(self._storage[-1], fn)
|
|
139
|
+
expired_files.append(fn)
|
|
140
|
+
self.cached_files[-1].pop(path)
|
|
141
|
+
|
|
142
|
+
if self.cached_files[-1]:
|
|
143
|
+
cache_path = os.path.join(self._storage[-1], "cache")
|
|
144
|
+
self._save(self.cached_files[-1], cache_path)
|
|
145
|
+
|
|
146
|
+
writable_cache_empty = not self.cached_files[-1]
|
|
147
|
+
return expired_files, writable_cache_empty
|
|
148
|
+
|
|
149
|
+
def load(self) -> None:
|
|
150
|
+
"""Load all metadata from disk and store in ``self.cached_files``"""
|
|
151
|
+
cached_files = []
|
|
152
|
+
for fn, _, _ in self._scan_locations():
|
|
153
|
+
if os.path.exists(fn):
|
|
154
|
+
# TODO: consolidate blocks here
|
|
155
|
+
loaded_cached_files = self._load(fn)
|
|
156
|
+
for c in loaded_cached_files.values():
|
|
157
|
+
if isinstance(c["blocks"], list):
|
|
158
|
+
c["blocks"] = set(c["blocks"])
|
|
159
|
+
cached_files.append(loaded_cached_files)
|
|
160
|
+
else:
|
|
161
|
+
cached_files.append({})
|
|
162
|
+
self.cached_files = cached_files or [{}]
|
|
163
|
+
|
|
164
|
+
def on_close_cached_file(self, f: Any, path: str) -> None:
|
|
165
|
+
"""Perform side-effect actions on closing a cached file.
|
|
166
|
+
|
|
167
|
+
The actual closing of the file is the responsibility of the caller.
|
|
168
|
+
"""
|
|
169
|
+
# File must be writeble, so in self.cached_files[-1]
|
|
170
|
+
c = self.cached_files[-1][path]
|
|
171
|
+
if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
|
|
172
|
+
c["blocks"] = True
|
|
173
|
+
|
|
174
|
+
def pop_file(self, path: str) -> str | None:
|
|
175
|
+
"""Remove metadata of cached file.
|
|
176
|
+
|
|
177
|
+
If path is in the cache, return the filename of the cached file,
|
|
178
|
+
otherwise return ``None``. Caller is responsible for deleting the
|
|
179
|
+
cached file.
|
|
180
|
+
"""
|
|
181
|
+
details = self.check_file(path, None)
|
|
182
|
+
if not details:
|
|
183
|
+
return None
|
|
184
|
+
_, fn = details
|
|
185
|
+
if fn.startswith(self._storage[-1]):
|
|
186
|
+
self.cached_files[-1].pop(path)
|
|
187
|
+
self.save()
|
|
188
|
+
else:
|
|
189
|
+
raise PermissionError(
|
|
190
|
+
"Can only delete cached file in last, writable cache location"
|
|
191
|
+
)
|
|
192
|
+
return fn
|
|
193
|
+
|
|
194
|
+
def save(self) -> None:
|
|
195
|
+
"""Save metadata to disk"""
|
|
196
|
+
for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files):
|
|
197
|
+
if not writable:
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
if os.path.exists(fn):
|
|
201
|
+
cached_files = self._load(fn)
|
|
202
|
+
for k, c in cached_files.items():
|
|
203
|
+
if k in cache:
|
|
204
|
+
if c["blocks"] is True or cache[k]["blocks"] is True:
|
|
205
|
+
c["blocks"] = True
|
|
206
|
+
else:
|
|
207
|
+
# self.cached_files[*][*]["blocks"] must continue to
|
|
208
|
+
# point to the same set object so that updates
|
|
209
|
+
# performed by MMapCache are propagated back to
|
|
210
|
+
# self.cached_files.
|
|
211
|
+
blocks = cache[k]["blocks"]
|
|
212
|
+
blocks.update(c["blocks"])
|
|
213
|
+
c["blocks"] = blocks
|
|
214
|
+
c["time"] = max(c["time"], cache[k]["time"])
|
|
215
|
+
c["uid"] = cache[k]["uid"]
|
|
216
|
+
|
|
217
|
+
# Files can be added to cache after it was written once
|
|
218
|
+
for k, c in cache.items():
|
|
219
|
+
if k not in cached_files:
|
|
220
|
+
cached_files[k] = c
|
|
221
|
+
else:
|
|
222
|
+
cached_files = cache
|
|
223
|
+
cache = {k: v.copy() for k, v in cached_files.items()}
|
|
224
|
+
for c in cache.values():
|
|
225
|
+
if isinstance(c["blocks"], set):
|
|
226
|
+
c["blocks"] = list(c["blocks"])
|
|
227
|
+
self._save(cache, fn)
|
|
228
|
+
self.cached_files[-1] = cached_files
|
|
229
|
+
|
|
230
|
+
def update_file(self, path: str, detail: Detail) -> None:
|
|
231
|
+
"""Update metadata for specific file in memory, do not save"""
|
|
232
|
+
self.cached_files[-1][path] = detail
|