fsspec 2023.6.0__py3-none-any.whl → 2023.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/gui.py CHANGED
@@ -257,12 +257,14 @@ class FileSelector(SigSlot):
257
257
  width_policy="max",
258
258
  )
259
259
  self.protocol = pn.widgets.Select(
260
- options=list(sorted(known_implementations)),
260
+ options=sorted(known_implementations),
261
261
  value=self.init_protocol,
262
262
  name="protocol",
263
263
  align="center",
264
264
  )
265
- self.kwargs = pn.widgets.TextInput(name="kwargs", value="{}", align="center")
265
+ self.kwargs = pn.widgets.TextInput(
266
+ name="kwargs", value=self.init_kwargs, align="center"
267
+ )
266
268
  self.go = pn.widgets.Button(name="⇨", align="end", width=45)
267
269
  self.main = SingleSelect(size=10)
268
270
  self.home = pn.widgets.Button(name="🏠", width=40, height=30, align="end")
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import hashlib
5
+ from typing import TYPE_CHECKING
6
+
7
+ from fsspec.implementations.local import make_path_posix
8
+
9
+ if TYPE_CHECKING:
10
+ from typing import Any
11
+
12
+
13
+ class AbstractCacheMapper(abc.ABC):
14
+ """Abstract super-class for mappers from remote URLs to local cached
15
+ basenames.
16
+ """
17
+
18
+ @abc.abstractmethod
19
+ def __call__(self, path: str) -> str:
20
+ ...
21
+
22
+ def __eq__(self, other: Any) -> bool:
23
+ # Identity only depends on class. When derived classes have attributes
24
+ # they will need to be included.
25
+ return isinstance(other, type(self))
26
+
27
+ def __hash__(self) -> int:
28
+ # Identity only depends on class. When derived classes have attributes
29
+ # they will need to be included.
30
+ return hash(type(self))
31
+
32
+
33
+ class BasenameCacheMapper(AbstractCacheMapper):
34
+ """Cache mapper that uses the basename of the remote URL and a fixed number
35
+ of directory levels above this.
36
+
37
+ The default is zero directory levels, meaning different paths with the same
38
+ basename will have the same cached basename.
39
+ """
40
+
41
+ def __init__(self, directory_levels: int = 0):
42
+ if directory_levels < 0:
43
+ raise ValueError(
44
+ "BasenameCacheMapper requires zero or positive directory_levels"
45
+ )
46
+ self.directory_levels = directory_levels
47
+
48
+ # Separator for directories when encoded as strings.
49
+ self._separator = "_@_"
50
+
51
+ def __call__(self, path: str) -> str:
52
+ path = make_path_posix(path)
53
+ prefix, *bits = path.rsplit("/", self.directory_levels + 1)
54
+ if bits:
55
+ return self._separator.join(bits)
56
+ else:
57
+ return prefix # No separator found, simple filename
58
+
59
+ def __eq__(self, other: Any) -> bool:
60
+ return super().__eq__(other) and self.directory_levels == other.directory_levels
61
+
62
+ def __hash__(self) -> int:
63
+ return super().__hash__() ^ hash(self.directory_levels)
64
+
65
+
66
+ class HashCacheMapper(AbstractCacheMapper):
67
+ """Cache mapper that uses a hash of the remote URL."""
68
+
69
+ def __call__(self, path: str) -> str:
70
+ return hashlib.sha256(path.encode()).hexdigest()
71
+
72
+
73
+ def create_cache_mapper(same_names: bool) -> AbstractCacheMapper:
74
+ """Factory method to create cache mapper for backward compatibility with
75
+ ``CachingFileSystem`` constructor using ``same_names`` kwarg.
76
+ """
77
+ if same_names:
78
+ return BasenameCacheMapper()
79
+ else:
80
+ return HashCacheMapper()
@@ -0,0 +1,232 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import pickle
5
+ import time
6
+ from typing import TYPE_CHECKING
7
+
8
+ from fsspec.utils import atomic_write
9
+
10
+ try:
11
+ import ujson as json
12
+ except ImportError:
13
+ if not TYPE_CHECKING:
14
+ import json
15
+
16
+ if TYPE_CHECKING:
17
+ from typing import Any, Dict, Iterator, Literal
18
+
19
+ from typing_extensions import TypeAlias
20
+
21
+ from .cached import CachingFileSystem
22
+
23
+ Detail: TypeAlias = Dict[str, Any]
24
+
25
+
26
+ class CacheMetadata:
27
+ """Cache metadata.
28
+
29
+ All reading and writing of cache metadata is performed by this class,
30
+ accessing the cached files and blocks is not.
31
+
32
+ Metadata is stored in a single file per storage directory in JSON format.
33
+ For backward compatibility, also reads metadata stored in pickle format
34
+ which is converted to JSON when next saved.
35
+ """
36
+
37
+ def __init__(self, storage: list[str]):
38
+ """
39
+
40
+ Parameters
41
+ ----------
42
+ storage: list[str]
43
+ Directories containing cached files, must be at least one. Metadata
44
+ is stored in the last of these directories by convention.
45
+ """
46
+ if not storage:
47
+ raise ValueError("CacheMetadata expects at least one storage location")
48
+
49
+ self._storage = storage
50
+ self.cached_files: list[Detail] = [{}]
51
+
52
+ # Private attribute to force saving of metadata in pickle format rather than
53
+ # JSON for use in tests to confirm can read both pickle and JSON formats.
54
+ self._force_save_pickle = False
55
+
56
+ def _load(self, fn: str) -> Detail:
57
+ """Low-level function to load metadata from specific file"""
58
+ try:
59
+ with open(fn, "r") as f:
60
+ return json.load(f)
61
+ except ValueError:
62
+ with open(fn, "rb") as f:
63
+ return pickle.load(f)
64
+
65
+ def _save(self, metadata_to_save: Detail, fn: str) -> None:
66
+ """Low-level function to save metadata to specific file"""
67
+ if self._force_save_pickle:
68
+ with atomic_write(fn) as f:
69
+ pickle.dump(metadata_to_save, f)
70
+ else:
71
+ with atomic_write(fn, mode="w") as f:
72
+ json.dump(metadata_to_save, f)
73
+
74
+ def _scan_locations(
75
+ self, writable_only: bool = False
76
+ ) -> Iterator[tuple[str, str, bool]]:
77
+ """Yield locations (filenames) where metadata is stored, and whether
78
+ writable or not.
79
+
80
+ Parameters
81
+ ----------
82
+ writable: bool
83
+ Set to True to only yield writable locations.
84
+
85
+ Returns
86
+ -------
87
+ Yields (str, str, bool)
88
+ """
89
+ n = len(self._storage)
90
+ for i, storage in enumerate(self._storage):
91
+ writable = i == n - 1
92
+ if writable_only and not writable:
93
+ continue
94
+ yield os.path.join(storage, "cache"), storage, writable
95
+
96
+ def check_file(
97
+ self, path: str, cfs: CachingFileSystem | None
98
+ ) -> Literal[False] | tuple[Detail, str]:
99
+ """If path is in cache return its details, otherwise return ``False``.
100
+
101
+ If the optional CachingFileSystem is specified then it is used to
102
+ perform extra checks to reject possible matches, such as if they are
103
+ too old.
104
+ """
105
+ for (fn, base, _), cache in zip(self._scan_locations(), self.cached_files):
106
+ if path not in cache:
107
+ continue
108
+ detail = cache[path].copy()
109
+
110
+ if cfs is not None:
111
+ if cfs.check_files and detail["uid"] != cfs.fs.ukey(path):
112
+ # Wrong file as determined by hash of file properties
113
+ continue
114
+ if cfs.expiry and time.time() - detail["time"] > cfs.expiry:
115
+ # Cached file has expired
116
+ continue
117
+
118
+ fn = os.path.join(base, detail["fn"])
119
+ if os.path.exists(fn):
120
+ return detail, fn
121
+ return False
122
+
123
+ def clear_expired(self, expiry_time: int) -> tuple[list[str], bool]:
124
+ """Remove expired metadata from the cache.
125
+
126
+ Returns names of files corresponding to expired metadata and a boolean
127
+ flag indicating whether the writable cache is empty. Caller is
128
+ responsible for deleting the expired files.
129
+ """
130
+ expired_files = []
131
+ for path, detail in self.cached_files[-1].copy().items():
132
+ if time.time() - detail["time"] > expiry_time:
133
+ fn = detail.get("fn", "")
134
+ if not fn:
135
+ raise RuntimeError(
136
+ f"Cache metadata does not contain 'fn' for {path}"
137
+ )
138
+ fn = os.path.join(self._storage[-1], fn)
139
+ expired_files.append(fn)
140
+ self.cached_files[-1].pop(path)
141
+
142
+ if self.cached_files[-1]:
143
+ cache_path = os.path.join(self._storage[-1], "cache")
144
+ self._save(self.cached_files[-1], cache_path)
145
+
146
+ writable_cache_empty = not self.cached_files[-1]
147
+ return expired_files, writable_cache_empty
148
+
149
+ def load(self) -> None:
150
+ """Load all metadata from disk and store in ``self.cached_files``"""
151
+ cached_files = []
152
+ for fn, _, _ in self._scan_locations():
153
+ if os.path.exists(fn):
154
+ # TODO: consolidate blocks here
155
+ loaded_cached_files = self._load(fn)
156
+ for c in loaded_cached_files.values():
157
+ if isinstance(c["blocks"], list):
158
+ c["blocks"] = set(c["blocks"])
159
+ cached_files.append(loaded_cached_files)
160
+ else:
161
+ cached_files.append({})
162
+ self.cached_files = cached_files or [{}]
163
+
164
+ def on_close_cached_file(self, f: Any, path: str) -> None:
165
+ """Perform side-effect actions on closing a cached file.
166
+
167
+ The actual closing of the file is the responsibility of the caller.
168
+ """
169
+ # File must be writeble, so in self.cached_files[-1]
170
+ c = self.cached_files[-1][path]
171
+ if c["blocks"] is not True and len(c["blocks"]) * f.blocksize >= f.size:
172
+ c["blocks"] = True
173
+
174
+ def pop_file(self, path: str) -> str | None:
175
+ """Remove metadata of cached file.
176
+
177
+ If path is in the cache, return the filename of the cached file,
178
+ otherwise return ``None``. Caller is responsible for deleting the
179
+ cached file.
180
+ """
181
+ details = self.check_file(path, None)
182
+ if not details:
183
+ return None
184
+ _, fn = details
185
+ if fn.startswith(self._storage[-1]):
186
+ self.cached_files[-1].pop(path)
187
+ self.save()
188
+ else:
189
+ raise PermissionError(
190
+ "Can only delete cached file in last, writable cache location"
191
+ )
192
+ return fn
193
+
194
+ def save(self) -> None:
195
+ """Save metadata to disk"""
196
+ for (fn, _, writable), cache in zip(self._scan_locations(), self.cached_files):
197
+ if not writable:
198
+ continue
199
+
200
+ if os.path.exists(fn):
201
+ cached_files = self._load(fn)
202
+ for k, c in cached_files.items():
203
+ if k in cache:
204
+ if c["blocks"] is True or cache[k]["blocks"] is True:
205
+ c["blocks"] = True
206
+ else:
207
+ # self.cached_files[*][*]["blocks"] must continue to
208
+ # point to the same set object so that updates
209
+ # performed by MMapCache are propagated back to
210
+ # self.cached_files.
211
+ blocks = cache[k]["blocks"]
212
+ blocks.update(c["blocks"])
213
+ c["blocks"] = blocks
214
+ c["time"] = max(c["time"], cache[k]["time"])
215
+ c["uid"] = cache[k]["uid"]
216
+
217
+ # Files can be added to cache after it was written once
218
+ for k, c in cache.items():
219
+ if k not in cached_files:
220
+ cached_files[k] = c
221
+ else:
222
+ cached_files = cache
223
+ cache = {k: v.copy() for k, v in cached_files.items()}
224
+ for c in cache.values():
225
+ if isinstance(c["blocks"], set):
226
+ c["blocks"] = list(c["blocks"])
227
+ self._save(cache, fn)
228
+ self.cached_files[-1] = cached_files
229
+
230
+ def update_file(self, path: str, detail: Detail) -> None:
231
+ """Update metadata for specific file in memory, do not save"""
232
+ self.cached_files[-1][path] = detail