fsspec 2025.3.2__py3-none-any.whl → 2025.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/__init__.py CHANGED
@@ -1,5 +1,3 @@
1
- from importlib.metadata import entry_points
2
-
3
1
  from . import caching
4
2
  from ._version import __version__ # noqa: F401
5
3
  from .callbacks import Callback
@@ -38,6 +36,10 @@ __all__ = [
38
36
 
39
37
 
40
38
  def process_entries():
39
+ try:
40
+ from importlib.metadata import entry_points
41
+ except ImportError:
42
+ return
41
43
  if entry_points is not None:
42
44
  try:
43
45
  eps = entry_points()
fsspec/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '2025.3.2'
21
- __version_tuple__ = version_tuple = (2025, 3, 2)
20
+ __version__ = version = '2025.5.1'
21
+ __version_tuple__ = version_tuple = (2025, 5, 1)
fsspec/generic.py CHANGED
@@ -16,15 +16,13 @@ logger = logging.getLogger("fsspec.generic")
16
16
 
17
17
 
18
18
  def set_generic_fs(protocol, **storage_options):
19
+ """Populate the dict used for method=="generic" lookups"""
19
20
  _generic_fs[protocol] = filesystem(protocol, **storage_options)
20
21
 
21
22
 
22
- default_method = "default"
23
-
24
-
25
- def _resolve_fs(url, method=None, protocol=None, storage_options=None):
23
+ def _resolve_fs(url, method, protocol=None, storage_options=None):
26
24
  """Pick instance of backend FS"""
27
- method = method or default_method
25
+ url = url[0] if isinstance(url, (list, tuple)) else url
28
26
  protocol = protocol or split_protocol(url)[0]
29
27
  storage_options = storage_options or {}
30
28
  if method == "default":
@@ -159,7 +157,7 @@ class GenericFileSystem(AsyncFileSystem):
159
157
 
160
158
  protocol = "generic" # there is no real reason to ever use a protocol with this FS
161
159
 
162
- def __init__(self, default_method="default", **kwargs):
160
+ def __init__(self, default_method="default", storage_options=None, **kwargs):
163
161
  """
164
162
 
165
163
  Parameters
@@ -171,22 +169,25 @@ class GenericFileSystem(AsyncFileSystem):
171
169
  configured via the config system
172
170
  - "generic": takes instances from the `_generic_fs` dict in this module,
173
171
  which you must populate before use. Keys are by protocol
172
+ - "options": expects storage_options, a dict mapping protocol to
173
+ kwargs to use when constructing the filesystem
174
174
  - "current": takes the most recently instantiated version of each FS
175
175
  """
176
176
  self.method = default_method
177
+ self.st_opts = storage_options
177
178
  super().__init__(**kwargs)
178
179
 
179
180
  def _parent(self, path):
180
- fs = _resolve_fs(path, self.method)
181
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
181
182
  return fs.unstrip_protocol(fs._parent(path))
182
183
 
183
184
  def _strip_protocol(self, path):
184
185
  # normalization only
185
- fs = _resolve_fs(path, self.method)
186
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
186
187
  return fs.unstrip_protocol(fs._strip_protocol(path))
187
188
 
188
189
  async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
189
- fs = _resolve_fs(path, self.method)
190
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
190
191
  if fs.async_impl:
191
192
  out = await fs._find(
192
193
  path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
@@ -251,7 +252,7 @@ class GenericFileSystem(AsyncFileSystem):
251
252
  value,
252
253
  **kwargs,
253
254
  ):
254
- fs = _resolve_fs(path, self.method)
255
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
255
256
  if fs.async_impl:
256
257
  return await fs._pipe_file(path, value, **kwargs)
257
258
  else:
@@ -269,7 +270,7 @@ class GenericFileSystem(AsyncFileSystem):
269
270
 
270
271
  async def _makedirs(self, path, exist_ok=False):
271
272
  logger.debug("Make dir %s", path)
272
- fs = _resolve_fs(path, self.method)
273
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
273
274
  if fs.async_impl:
274
275
  await fs._makedirs(path, exist_ok=exist_ok)
275
276
  else:
@@ -288,6 +289,7 @@ class GenericFileSystem(AsyncFileSystem):
288
289
  url2,
289
290
  blocksize=2**20,
290
291
  callback=DEFAULT_CALLBACK,
292
+ tempdir: Optional[str] = None,
291
293
  **kwargs,
292
294
  ):
293
295
  fs = _resolve_fs(url, self.method)
@@ -295,35 +297,10 @@ class GenericFileSystem(AsyncFileSystem):
295
297
  if fs is fs2:
296
298
  # pure remote
297
299
  if fs.async_impl:
298
- return await fs._cp_file(url, url2, **kwargs)
300
+ return await fs._copy(url, url2, **kwargs)
299
301
  else:
300
- return fs.cp_file(url, url2, **kwargs)
301
- kw = {"blocksize": 0, "cache_type": "none"}
302
- try:
303
- f1 = (
304
- await fs.open_async(url, "rb")
305
- if hasattr(fs, "open_async")
306
- else fs.open(url, "rb", **kw)
307
- )
308
- callback.set_size(await maybe_await(f1.size))
309
- f2 = (
310
- await fs2.open_async(url2, "wb")
311
- if hasattr(fs2, "open_async")
312
- else fs2.open(url2, "wb", **kw)
313
- )
314
- while f1.size is None or f2.tell() < f1.size:
315
- data = await maybe_await(f1.read(blocksize))
316
- if f1.size is None and not data:
317
- break
318
- await maybe_await(f2.write(data))
319
- callback.absolute_update(f2.tell())
320
- finally:
321
- try:
322
- await maybe_await(f2.close())
323
- await maybe_await(f1.close())
324
- except NameError:
325
- # fail while opening f1 or f2
326
- pass
302
+ return fs.copy(url, url2, **kwargs)
303
+ await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
327
304
 
328
305
  async def _make_many_dirs(self, urls, exist_ok=True):
329
306
  fs = _resolve_fs(urls[0], self.method)
@@ -347,17 +324,22 @@ class GenericFileSystem(AsyncFileSystem):
347
324
  tempdir: Optional[str] = None,
348
325
  **kwargs,
349
326
  ):
327
+ # TODO: special case for one FS being local, which can use get/put
328
+ # TODO: special case for one being memFS, which can use cat/pipe
350
329
  if recursive:
351
- raise NotImplementedError
352
- fs = _resolve_fs(path1[0], self.method)
353
- fs2 = _resolve_fs(path2[0], self.method)
354
- # not expanding paths atm., assume call is from rsync()
330
+ raise NotImplementedError("Please use fsspec.generic.rsync")
331
+ path1 = [path1] if isinstance(path1, str) else path1
332
+ path2 = [path2] if isinstance(path2, str) else path2
333
+
334
+ fs = _resolve_fs(path1, self.method)
335
+ fs2 = _resolve_fs(path2, self.method)
336
+
355
337
  if fs is fs2:
356
- # pure remote
357
338
  if fs.async_impl:
358
339
  return await fs._copy(path1, path2, **kwargs)
359
340
  else:
360
341
  return fs.copy(path1, path2, **kwargs)
342
+
361
343
  await copy_file_op(
362
344
  fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
363
345
  )
@@ -377,31 +359,33 @@ async def copy_file_op(
377
359
  fs2,
378
360
  u2,
379
361
  os.path.join(tempdir, uuid.uuid4().hex),
380
- on_error=on_error,
381
362
  )
382
363
  for u1, u2 in zip(url1, url2)
383
364
  ]
384
- await _run_coros_in_chunks(coros, batch_size=batch_size)
365
+ out = await _run_coros_in_chunks(
366
+ coros, batch_size=batch_size, return_exceptions=True
367
+ )
385
368
  finally:
386
369
  shutil.rmtree(tempdir)
370
+ if on_error == "return":
371
+ return out
372
+ elif on_error == "raise":
373
+ for o in out:
374
+ if isinstance(o, Exception):
375
+ raise o
387
376
 
388
377
 
389
378
  async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
390
- ex = () if on_error == "raise" else Exception
391
- logger.debug("Copy %s -> %s", url1, url2)
392
- try:
393
- if fs1.async_impl:
394
- await fs1._get_file(url1, local)
395
- else:
396
- fs1.get_file(url1, local)
397
- if fs2.async_impl:
398
- await fs2._put_file(local, url2)
399
- else:
400
- fs2.put_file(local, url2)
401
- os.unlink(local)
402
- logger.debug("Copy %s -> %s; done", url1, url2)
403
- except ex as e:
404
- logger.debug("ignoring cp exception for %s: %s", url1, e)
379
+ if fs1.async_impl:
380
+ await fs1._get_file(url1, local)
381
+ else:
382
+ fs1.get_file(url1, local)
383
+ if fs2.async_impl:
384
+ await fs2._put_file(local, url2)
385
+ else:
386
+ fs2.put_file(local, url2)
387
+ os.unlink(local)
388
+ logger.debug("Copy %s -> %s; done", url1, url2)
405
389
 
406
390
 
407
391
  async def maybe_await(cor):
@@ -2,6 +2,7 @@ import asyncio
2
2
  import functools
3
3
  import inspect
4
4
 
5
+ import fsspec
5
6
  from fsspec.asyn import AsyncFileSystem, running_async
6
7
 
7
8
 
@@ -42,14 +43,24 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
42
43
  The synchronous filesystem instance to wrap.
43
44
  """
44
45
 
45
- protocol = "async_wrapper"
46
+ protocol = "asyncwrapper", "async_wrapper"
46
47
  cachable = False
47
48
 
48
- def __init__(self, fs, *args, asynchronous=None, **kwargs):
49
+ def __init__(
50
+ self,
51
+ fs=None,
52
+ asynchronous=None,
53
+ target_protocol=None,
54
+ target_options=None,
55
+ **kwargs,
56
+ ):
49
57
  if asynchronous is None:
50
58
  asynchronous = running_async()
51
- super().__init__(*args, asynchronous=asynchronous, **kwargs)
52
- self.sync_fs = fs
59
+ super().__init__(asynchronous=asynchronous, **kwargs)
60
+ if fs is not None:
61
+ self.sync_fs = fs
62
+ else:
63
+ self.sync_fs = fsspec.filesystem(target_protocol, **target_options)
53
64
  self.protocol = self.sync_fs.protocol
54
65
  self._wrap_all_sync_methods()
55
66
 
@@ -335,6 +335,11 @@ class CachingFileSystem(AbstractFileSystem):
335
335
  self._metadata.update_file(path, detail)
336
336
  logger.debug("Creating local sparse file for %s", path)
337
337
 
338
+ # explicitly submitting the size to the open call will avoid extra
339
+ # operations when opening. This is particularly relevant
340
+ # for any file that is read over a network, e.g. S3.
341
+ size = detail.get("size", None)
342
+
338
343
  # call target filesystems open
339
344
  self._mkcache()
340
345
  f = self.fs._open(
@@ -344,8 +349,15 @@ class CachingFileSystem(AbstractFileSystem):
344
349
  autocommit=autocommit,
345
350
  cache_options=cache_options,
346
351
  cache_type="none",
352
+ size=size,
347
353
  **kwargs,
348
354
  )
355
+
356
+ # set size if not already set
357
+ if size is None:
358
+ detail["size"] = f.size
359
+ self._metadata.update_file(path, detail)
360
+
349
361
  if self.compression:
350
362
  comp = (
351
363
  infer_compression(path)
@@ -14,12 +14,13 @@ class DatabricksException(Exception):
14
14
  Helper class for exceptions raised in this module.
15
15
  """
16
16
 
17
- def __init__(self, error_code, message):
17
+ def __init__(self, error_code, message, details=None):
18
18
  """Create a new DatabricksException"""
19
19
  super().__init__(message)
20
20
 
21
21
  self.error_code = error_code
22
22
  self.message = message
23
+ self.details = details
23
24
 
24
25
 
25
26
  class DatabricksFileSystem(AbstractFileSystem):
@@ -80,7 +81,7 @@ class DatabricksFileSystem(AbstractFileSystem):
80
81
  raise FileNotFoundError(e.message) from e
81
82
 
82
83
  raise
83
- files = r["files"]
84
+ files = r.get("files", [])
84
85
  out = [
85
86
  {
86
87
  "name": o["path"],
@@ -0,0 +1,232 @@
1
+ import requests
2
+
3
+ from ..spec import AbstractFileSystem
4
+ from ..utils import infer_storage_options
5
+ from .memory import MemoryFile
6
+
7
+
8
+ class GistFileSystem(AbstractFileSystem):
9
+ """
10
+ Interface to files in a single GitHub Gist.
11
+
12
+ Provides read-only access to a gist's files. Gists do not contain
13
+ subdirectories, so file listing is straightforward.
14
+
15
+ Parameters
16
+ ----------
17
+ gist_id : str
18
+ The ID of the gist you want to access (the long hex value from the URL).
19
+ filenames : list[str] (optional)
20
+ If provided, only make a file system representing these files, and do not fetch
21
+ the list of all files for this gist.
22
+ sha : str (optional)
23
+ If provided, fetch a particular revision of the gist. If omitted,
24
+ the latest revision is used.
25
+ username : str (optional)
26
+ GitHub username for authentication (required if token is given).
27
+ token : str (optional)
28
+ GitHub personal access token (required if username is given).
29
+ timeout : (float, float) or float, optional
30
+ Connect and read timeouts for requests (default 60s each).
31
+ kwargs : dict
32
+ Stored on `self.request_kw` and passed to `requests.get` when fetching Gist
33
+ metadata or reading ("opening") a file.
34
+ """
35
+
36
+ protocol = "gist"
37
+ gist_url = "https://api.github.com/gists/{gist_id}"
38
+ gist_rev_url = "https://api.github.com/gists/{gist_id}/{sha}"
39
+
40
+ def __init__(
41
+ self,
42
+ gist_id,
43
+ filenames=None,
44
+ sha=None,
45
+ username=None,
46
+ token=None,
47
+ timeout=None,
48
+ **kwargs,
49
+ ):
50
+ super().__init__()
51
+ self.gist_id = gist_id
52
+ self.filenames = filenames
53
+ self.sha = sha # revision of the gist (optional)
54
+ if (username is None) ^ (token is None):
55
+ # Both or neither must be set
56
+ if username or token:
57
+ raise ValueError("Auth requires both username and token, or neither.")
58
+ self.username = username
59
+ self.token = token
60
+ self.request_kw = kwargs
61
+ # Default timeouts to 60s connect/read if none provided
62
+ self.timeout = timeout if timeout is not None else (60, 60)
63
+
64
+ # We use a single-level "directory" cache, because a gist is essentially flat
65
+ self.dircache[""] = self._fetch_file_list()
66
+
67
+ @property
68
+ def kw(self):
69
+ """Auth parameters passed to 'requests' if we have username/token."""
70
+ if self.username is not None and self.token is not None:
71
+ return {"auth": (self.username, self.token), **self.request_kw}
72
+ return self.request_kw
73
+
74
+ def _fetch_gist_metadata(self):
75
+ """
76
+ Fetch the JSON metadata for this gist (possibly for a specific revision).
77
+ """
78
+ if self.sha:
79
+ url = self.gist_rev_url.format(gist_id=self.gist_id, sha=self.sha)
80
+ else:
81
+ url = self.gist_url.format(gist_id=self.gist_id)
82
+
83
+ r = requests.get(url, timeout=self.timeout, **self.kw)
84
+ if r.status_code == 404:
85
+ raise FileNotFoundError(
86
+ f"Gist not found: {self.gist_id}@{self.sha or 'latest'}"
87
+ )
88
+ r.raise_for_status()
89
+ return r.json()
90
+
91
+ def _fetch_file_list(self):
92
+ """
93
+ Returns a list of dicts describing each file in the gist. These get stored
94
+ in self.dircache[""].
95
+ """
96
+ meta = self._fetch_gist_metadata()
97
+ if self.filenames:
98
+ available_files = meta.get("files", {})
99
+ files = {}
100
+ for fn in self.filenames:
101
+ if fn not in available_files:
102
+ raise FileNotFoundError(fn)
103
+ files[fn] = available_files[fn]
104
+ else:
105
+ files = meta.get("files", {})
106
+
107
+ out = []
108
+ for fname, finfo in files.items():
109
+ if finfo is None:
110
+ # Occasionally GitHub returns a file entry with null if it was deleted
111
+ continue
112
+ # Build a directory entry
113
+ out.append(
114
+ {
115
+ "name": fname, # file's name
116
+ "type": "file", # gists have no subdirectories
117
+ "size": finfo.get("size", 0), # file size in bytes
118
+ "raw_url": finfo.get("raw_url"),
119
+ }
120
+ )
121
+ return out
122
+
123
+ @classmethod
124
+ def _strip_protocol(cls, path):
125
+ """
126
+ Remove 'gist://' from the path, if present.
127
+ """
128
+ # The default infer_storage_options can handle gist://username:token@id/file
129
+ # or gist://id/file, but let's ensure we handle a normal usage too.
130
+ # We'll just strip the protocol prefix if it exists.
131
+ path = infer_storage_options(path).get("path", path)
132
+ return path.lstrip("/")
133
+
134
+ @staticmethod
135
+ def _get_kwargs_from_urls(path):
136
+ """
137
+ Parse 'gist://' style URLs into GistFileSystem constructor kwargs.
138
+ For example:
139
+ gist://:TOKEN@<gist_id>/file.txt
140
+ gist://username:TOKEN@<gist_id>/file.txt
141
+ """
142
+ so = infer_storage_options(path)
143
+ out = {}
144
+ if "username" in so and so["username"]:
145
+ out["username"] = so["username"]
146
+ if "password" in so and so["password"]:
147
+ out["token"] = so["password"]
148
+ if "host" in so and so["host"]:
149
+ # We interpret 'host' as the gist ID
150
+ out["gist_id"] = so["host"]
151
+
152
+ # Extract SHA and filename from path
153
+ if "path" in so and so["path"]:
154
+ path_parts = so["path"].rsplit("/", 2)[-2:]
155
+ if len(path_parts) == 2:
156
+ if path_parts[0]: # SHA present
157
+ out["sha"] = path_parts[0]
158
+ if path_parts[1]: # filename also present
159
+ out["filenames"] = [path_parts[1]]
160
+
161
+ return out
162
+
163
+ def ls(self, path="", detail=False, **kwargs):
164
+ """
165
+ List files in the gist. Gists are single-level, so any 'path' is basically
166
+ the filename, or empty for all files.
167
+
168
+ Parameters
169
+ ----------
170
+ path : str, optional
171
+ The filename to list. If empty, returns all files in the gist.
172
+ detail : bool, default False
173
+ If True, return a list of dicts; if False, return a list of filenames.
174
+ """
175
+ path = self._strip_protocol(path or "")
176
+ # If path is empty, return all
177
+ if path == "":
178
+ results = self.dircache[""]
179
+ else:
180
+ # We want just the single file with this name
181
+ all_files = self.dircache[""]
182
+ results = [f for f in all_files if f["name"] == path]
183
+ if not results:
184
+ raise FileNotFoundError(path)
185
+ if detail:
186
+ return results
187
+ else:
188
+ return sorted(f["name"] for f in results)
189
+
190
+ def _open(self, path, mode="rb", block_size=None, **kwargs):
191
+ """
192
+ Read a single file from the gist.
193
+ """
194
+ if mode != "rb":
195
+ raise NotImplementedError("GitHub Gist FS is read-only (no write).")
196
+
197
+ path = self._strip_protocol(path)
198
+ # Find the file entry in our dircache
199
+ matches = [f for f in self.dircache[""] if f["name"] == path]
200
+ if not matches:
201
+ raise FileNotFoundError(path)
202
+ finfo = matches[0]
203
+
204
+ raw_url = finfo.get("raw_url")
205
+ if not raw_url:
206
+ raise FileNotFoundError(f"No raw_url for file: {path}")
207
+
208
+ r = requests.get(raw_url, timeout=self.timeout, **self.kw)
209
+ if r.status_code == 404:
210
+ raise FileNotFoundError(path)
211
+ r.raise_for_status()
212
+ return MemoryFile(path, None, r.content)
213
+
214
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
215
+ """
216
+ Return {path: contents} for the given file or files. If 'recursive' is True,
217
+ and path is empty, returns all files in the gist.
218
+ """
219
+ paths = self.expand_path(path, recursive=recursive)
220
+ out = {}
221
+ for p in paths:
222
+ try:
223
+ with self.open(p, "rb") as f:
224
+ out[p] = f.read()
225
+ except FileNotFoundError as e:
226
+ if on_error == "raise":
227
+ raise e
228
+ elif on_error == "omit":
229
+ pass # skip
230
+ else:
231
+ out[p] = e
232
+ return out
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import re
2
3
 
3
4
  import requests
4
5
 
@@ -6,8 +7,6 @@ from ..spec import AbstractFileSystem
6
7
  from ..utils import infer_storage_options
7
8
  from .memory import MemoryFile
8
9
 
9
- # TODO: add GIST backend, would be very similar
10
-
11
10
 
12
11
  class GithubFileSystem(AbstractFileSystem):
13
12
  """Interface to files in github
@@ -265,3 +264,70 @@ class GithubFileSystem(AbstractFileSystem):
265
264
  cache_options=cache_options,
266
265
  **kwargs,
267
266
  )
267
+
268
+ def rm(self, path, recursive=False, maxdepth=None, message=None):
269
+ path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
270
+ for p in reversed(path):
271
+ self.rm_file(p, message=message)
272
+
273
+ def rm_file(self, path, message=None, **kwargs):
274
+ """
275
+ Remove a file from a specified branch using a given commit message.
276
+
277
+ Since Github DELETE operation requires a branch name, and we can't reliably
278
+ determine whether the provided SHA refers to a branch, tag, or commit, we
279
+ assume it's a branch. If it's not, the user will encounter an error when
280
+ attempting to retrieve the file SHA or delete the file.
281
+
282
+ Parameters
283
+ ----------
284
+ path: str
285
+ The file's location relative to the repository root.
286
+ message: str, optional
287
+ The commit message for the deletion.
288
+ """
289
+
290
+ if not self.username:
291
+ raise ValueError("Authentication required")
292
+
293
+ path = self._strip_protocol(path)
294
+
295
+ # Attempt to get SHA from cache or Github API
296
+ sha = self._get_sha_from_cache(path)
297
+ if not sha:
298
+ url = self.content_url.format(
299
+ org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
300
+ )
301
+ r = requests.get(url, timeout=self.timeout, **self.kw)
302
+ if r.status_code == 404:
303
+ raise FileNotFoundError(path)
304
+ r.raise_for_status()
305
+ sha = r.json()["sha"]
306
+
307
+ # Delete the file
308
+ delete_url = self.content_url.format(
309
+ org=self.org, repo=self.repo, path=path, sha=self.root
310
+ )
311
+ branch = self.root
312
+ data = {
313
+ "message": message or f"Delete {path}",
314
+ "sha": sha,
315
+ **({"branch": branch} if branch else {}),
316
+ }
317
+
318
+ r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
319
+ error_message = r.json().get("message", "")
320
+ if re.search(r"Branch .+ not found", error_message):
321
+ error = "Remove only works when the filesystem is initialised from a branch or default (None)"
322
+ raise ValueError(error)
323
+ r.raise_for_status()
324
+
325
+ self.invalidate_cache(path)
326
+
327
+ def _get_sha_from_cache(self, path):
328
+ for entries in self.dircache.values():
329
+ for entry in entries:
330
+ entry_path = entry.get("name")
331
+ if entry_path and entry_path == path and "sha" in entry:
332
+ return entry["sha"]
333
+ return None
@@ -6,6 +6,7 @@ import os.path as osp
6
6
  import shutil
7
7
  import stat
8
8
  import tempfile
9
+ from functools import lru_cache
9
10
 
10
11
  from fsspec import AbstractFileSystem
11
12
  from fsspec.compression import compr
@@ -154,7 +155,11 @@ class LocalFileSystem(AbstractFileSystem):
154
155
  def put_file(self, path1, path2, callback=None, **kwargs):
155
156
  return self.cp_file(path1, path2, **kwargs)
156
157
 
157
- def mv(self, path1, path2, **kwargs):
158
+ def mv(self, path1, path2, recursive: bool = True, **kwargs):
159
+ """Move files/directories
160
+ For the specific case of local, all ops on directories are recursive and
161
+ the recursive= kwarg is ignored.
162
+ """
158
163
  path1 = self._strip_protocol(path1)
159
164
  path2 = self._strip_protocol(path2)
160
165
  shutil.move(path1, path2)
@@ -350,6 +355,19 @@ def trailing_sep(path):
350
355
  return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
351
356
 
352
357
 
358
+ @lru_cache(maxsize=1)
359
+ def get_umask(mask: int = 0o666) -> int:
360
+ """Get the current umask.
361
+
362
+ Follows https://stackoverflow.com/a/44130549 to get the umask.
363
+ Temporarily sets the umask to the given value, and then resets it to the
364
+ original value.
365
+ """
366
+ value = os.umask(mask)
367
+ os.umask(value)
368
+ return value
369
+
370
+
353
371
  class LocalFileOpener(io.IOBase):
354
372
  def __init__(
355
373
  self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
@@ -412,7 +430,22 @@ class LocalFileOpener(io.IOBase):
412
430
  def commit(self):
413
431
  if self.autocommit:
414
432
  raise RuntimeError("Can only commit if not already set to autocommit")
415
- shutil.move(self.temp, self.path)
433
+ try:
434
+ shutil.move(self.temp, self.path)
435
+ except PermissionError as e:
436
+ # shutil.move raises PermissionError if os.rename
437
+ # and the default copy2 fallback with shutil.copystats fail.
438
+ # The file should be there nonetheless, but without copied permissions.
439
+ # If it doesn't exist, there was no permission to create the file.
440
+ if not os.path.exists(self.path):
441
+ raise e
442
+ else:
443
+ # If PermissionError is not raised, permissions can be set.
444
+ try:
445
+ mask = 0o666
446
+ os.chmod(self.path, mask & ~get_umask(mask))
447
+ except RuntimeError:
448
+ pass
416
449
 
417
450
  def discard(self):
418
451
  if self.autocommit:
fsspec/registry.py CHANGED
@@ -72,13 +72,13 @@ known_implementations = {
72
72
  "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
73
  "err": "pyarrow and local java libraries required for HDFS",
74
74
  },
75
- "async_wrapper": {
76
- "class": "fsspec.asyn_wrapper.AsyncWrapperFileSystem",
77
- },
78
75
  "asynclocal": {
79
76
  "class": "morefs.asyn_local.AsyncLocalFileSystem",
80
77
  "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
81
78
  },
79
+ "asyncwrapper": {
80
+ "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
81
+ },
82
82
  "az": {
83
83
  "class": "adlfs.AzureBlobFileSystem",
84
84
  "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
@@ -122,6 +122,10 @@ known_implementations = {
122
122
  "err": "Please install gdrivefs for access to Google Drive",
123
123
  },
124
124
  "generic": {"class": "fsspec.generic.GenericFileSystem"},
125
+ "gist": {
126
+ "class": "fsspec.implementations.gist.GistFileSystem",
127
+ "err": "Install the requests package to use the gist FS",
128
+ },
125
129
  "git": {
126
130
  "class": "fsspec.implementations.git.GitFileSystem",
127
131
  "err": "Install pygit2 to browse local git repos",
@@ -180,6 +184,10 @@ known_implementations = {
180
184
  "class": "ossfs.OSSFileSystem",
181
185
  "err": "Install ossfs to access Alibaba Object Storage System",
182
186
  },
187
+ "pyscript": {
188
+ "class": "pyscript_fsspec_client.client.PyscriptFileSystem",
189
+ "err": "Install requests (cpython) or run in pyscript",
190
+ },
183
191
  "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
184
192
  "root": {
185
193
  "class": "fsspec_xrootd.XRootDFileSystem",
fsspec/spec.py CHANGED
@@ -548,17 +548,45 @@ class AbstractFileSystem(metaclass=_Cached):
548
548
  return sizes
549
549
 
550
550
  def glob(self, path, maxdepth=None, **kwargs):
551
- """
552
- Find files by glob-matching.
551
+ """Find files by glob-matching.
553
552
 
554
- If the path ends with '/', only folders are returned.
553
+ Pattern matching capabilities for finding files that match the given pattern.
555
554
 
556
- We support ``"**"``,
557
- ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
555
+ Parameters
556
+ ----------
557
+ path: str
558
+ The glob pattern to match against
559
+ maxdepth: int or None
560
+ Maximum depth for ``'**'`` patterns. Applied on the first ``'**'`` found.
561
+ Must be at least 1 if provided.
562
+ kwargs:
563
+ Additional arguments passed to ``find`` (e.g., detail=True)
558
564
 
559
- The `maxdepth` option is applied on the first `**` found in the path.
565
+ Returns
566
+ -------
567
+ List of matched paths, or dict of paths and their info if detail=True
560
568
 
561
- kwargs are passed to ``ls``.
569
+ Notes
570
+ -----
571
+ Supported patterns:
572
+ - '*': Matches any sequence of characters within a single directory level
573
+ - ``'**'``: Matches any number of directory levels (must be an entire path component)
574
+ - '?': Matches exactly one character
575
+ - '[abc]': Matches any character in the set
576
+ - '[a-z]': Matches any character in the range
577
+ - '[!abc]': Matches any character NOT in the set
578
+
579
+ Special behaviors:
580
+ - If the path ends with '/', only folders are returned
581
+ - Consecutive '*' characters are compressed into a single '*'
582
+ - Empty brackets '[]' never match anything
583
+ - Negated empty brackets '[!]' match any single character
584
+ - Special characters in character classes are escaped properly
585
+
586
+ Limitations:
587
+ - ``'**'`` must be a complete path component (e.g., ``'a/**/b'``, not ``'a**b'``)
588
+ - No brace expansion ('{a,b}.txt')
589
+ - No extended glob patterns ('+(pattern)', '!(pattern)')
562
590
  """
563
591
  if maxdepth is not None and maxdepth < 1:
564
592
  raise ValueError("maxdepth must be at least 1")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2025.3.2
3
+ Version: 2025.5.1
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -1,5 +1,5 @@
1
- fsspec/__init__.py,sha256=l9MJaNNV2d4wKpCtMvXDr55n92DkdrAayGy3F9ICjzk,1998
2
- fsspec/_version.py,sha256=Qu6vPGmiev5F31vB8drXeCnOmd04CpjXkul_ZRHIKdg,517
1
+ fsspec/__init__.py,sha256=L7qwNBU1iMNQd8Of87HYSNFT9gWlNMSESaJC8fY0AaQ,2053
2
+ fsspec/_version.py,sha256=wECC04X-mZMQuTtKrW1mQDmwI4m9ylB6qWwnMihRB6I,517
3
3
  fsspec/archive.py,sha256=vM6t_lgV6lBWbBYwpm3S4ofBQFQxUPr5KkDQrrQcQro,2411
4
4
  fsspec/asyn.py,sha256=VJ2jBdYgUjV4_dETpKeCp2wF1XHAdeUET95d2HqNZck,36776
5
5
  fsspec/caching.py,sha256=n_SbdT-l92Kqo3e1BQgef0uEWJD0raP5-Qd8Ewp8CHY,34292
@@ -11,33 +11,34 @@ fsspec/core.py,sha256=1tLctwr7sF1VO3djc_UkjhJ8IAEy0TUMH_bb07Sw17E,23828
11
11
  fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
12
12
  fsspec/exceptions.py,sha256=pauSLDMxzTJMOjvX1WEUK0cMyFkrFxpWJsyFywav7A8,331
13
13
  fsspec/fuse.py,sha256=Q-3NOOyLqBfYa4Db5E19z_ZY36zzYHtIs1mOUasItBQ,10177
14
- fsspec/generic.py,sha256=AFbo-mHBt5QJV1Aplg5CJuUiiJ4bNQhcKRuwkZJdWac,13761
14
+ fsspec/generic.py,sha256=6NIZX_Un78zkTtw46RoYubDPg_gPgO1blsjxytAVzu4,13449
15
15
  fsspec/gui.py,sha256=xBnHL2-r0LVwhDAtnHoPpXts7jd4Z32peawCJiI-7lI,13975
16
16
  fsspec/json.py,sha256=65sQ0Y7mTj33u_Y4IId5up4abQ3bAel4E4QzbKMiQSg,3826
17
17
  fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
18
18
  fsspec/parquet.py,sha256=6ibAmG527L5JNFS0VO8BDNlxHdA3bVYqdByeiFgpUVM,19448
19
- fsspec/registry.py,sha256=5kz-61fkb29lgSLzXWvk67ft6ooeYJR24Hs1583rD8w,11570
20
- fsspec/spec.py,sha256=l7ZEbgLsnrFuS-yrGl9re6ia1Yts1_10RqGV_mT-5P8,76032
19
+ fsspec/registry.py,sha256=iPIyCIDcSKxgX7ppEFKHEmTubt8Z-YYpN0Eb3K94S3k,11893
20
+ fsspec/spec.py,sha256=7cOUe5PC5Uyf56HtGBUHEoym8ktPj-BI8G4HR8Xd_C8,77298
21
21
  fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
22
22
  fsspec/utils.py,sha256=A11t25RnpiQ30RO6xeR0Qqlu3fGj8bnc40jg08tlYSI,22980
23
23
  fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
25
- fsspec/implementations/asyn_wrapper.py,sha256=PNkYdHiLVWwk-GJok5O6dTnhPwDaSU9QTtBTE9CIRec,3082
25
+ fsspec/implementations/asyn_wrapper.py,sha256=uZROca8lqiGOf5EILoAUjfalWoUU5CtseiE46l_3lkQ,3326
26
26
  fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
27
27
  fsspec/implementations/cache_metadata.py,sha256=pcOJYcBQY5OaC7Yhw0F3wjg08QLYApGmoISCrbs59ks,8511
28
- fsspec/implementations/cached.py,sha256=2EqeHZi6TKZzr222ZyXHjuYBnx_1g_HWXj1CJBFsXbc,33173
28
+ fsspec/implementations/cached.py,sha256=FKEstAQxn5CyA5yM8NSl154ffm_k2wQ6_Za6C7ygIWg,33592
29
29
  fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
30
30
  fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
31
- fsspec/implementations/dbfs.py,sha256=XwpotuS_ncz3XK1dkUteww9GnTja7HoY91c0m4GUfwI,15092
31
+ fsspec/implementations/dbfs.py,sha256=2Bp-0m9SqlaroDa0KbXxb5BobCyBJ7_5YQBISf3fxbQ,15145
32
32
  fsspec/implementations/dirfs.py,sha256=f1sGnQ9Vf0xTxrXo4jDeBy4Qfq3RTqAEemqBSeb0hwY,12108
33
33
  fsspec/implementations/ftp.py,sha256=sorsczLp_2J3ukONsbZY-11sRZP6H5a3V7XXf6o6ip0,11936
34
+ fsspec/implementations/gist.py,sha256=Ost985hmFr50KsA-QD0shY3hP4KX5qJ9rb5C-X4ehK8,8341
34
35
  fsspec/implementations/git.py,sha256=4SElW9U5d3k3_ITlvUAx59Yk7XLNRTqkGa2C3hCUkWM,3754
35
- fsspec/implementations/github.py,sha256=tvLepeDtBsFHYocsD7XS1W6r8wxaoK57Us3kHGisnjU,9146
36
+ fsspec/implementations/github.py,sha256=aCsZL8UvXZgdkcB1RUs3DdLeNrjLKcFsFYeQFDWbBFo,11653
36
37
  fsspec/implementations/http.py,sha256=_gLt0yGbVOYWvE9pK81WCC-3TgbOMOKJYllBU72ALo8,30138
37
38
  fsspec/implementations/http_sync.py,sha256=UydDqSdUBdhiJ1KufzV8rKGrTftFR4QmNV0safILb8g,30133
38
39
  fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
39
40
  fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
40
- fsspec/implementations/local.py,sha256=g2iK8uWPGkSiI6bwmnIRXhJMQvTegCmXZ8Kb8ojhvAo,15543
41
+ fsspec/implementations/local.py,sha256=38ylhASzWXSF0X41Bw8pYXyiM8xVu4UWDgE7l3Em8Uc,16768
41
42
  fsspec/implementations/memory.py,sha256=cLNrK9wk97sl4Tre9uVDXWj6mEHvvVVIgaVgNA5KVIg,10527
42
43
  fsspec/implementations/reference.py,sha256=t23prs_5ugXJnYhLxLlPLPyagrx4_ofZWR_oyX9wd3Q,48703
43
44
  fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
@@ -53,7 +54,7 @@ fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1
53
54
  fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
54
55
  fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
55
56
  fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
56
- fsspec-2025.3.2.dist-info/METADATA,sha256=4x2UMMjzVQTh6svjBem1yKpfCTuDqG-bSUWZiWRCFPg,11697
57
- fsspec-2025.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
- fsspec-2025.3.2.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
59
- fsspec-2025.3.2.dist-info/RECORD,,
57
+ fsspec-2025.5.1.dist-info/METADATA,sha256=pL2uvv5MW4GapXukzVwdZGe3ghW4KRvh75lvDXTkMl4,11697
58
+ fsspec-2025.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
+ fsspec-2025.5.1.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
60
+ fsspec-2025.5.1.dist-info/RECORD,,