fsspec 2025.7.0__py3-none-any.whl → 2025.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/_version.py CHANGED
@@ -1,7 +1,14 @@
1
1
  # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
3
 
4
- __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
5
12
 
6
13
  TYPE_CHECKING = False
7
14
  if TYPE_CHECKING:
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
9
16
  from typing import Union
10
17
 
11
18
  VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
12
20
  else:
13
21
  VERSION_TUPLE = object
22
+ COMMIT_ID = object
14
23
 
15
24
  version: str
16
25
  __version__: str
17
26
  __version_tuple__: VERSION_TUPLE
18
27
  version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
19
30
 
20
- __version__ = version = '2025.7.0'
21
- __version_tuple__ = version_tuple = (2025, 7, 0)
31
+ __version__ = version = '2025.10.0'
32
+ __version_tuple__ = version_tuple = (2025, 10, 0)
33
+
34
+ __commit_id__ = commit_id = None
fsspec/conftest.py CHANGED
@@ -3,11 +3,12 @@ import shutil
3
3
  import subprocess
4
4
  import sys
5
5
  import time
6
+ from collections import deque
7
+ from collections.abc import Generator, Sequence
6
8
 
7
9
  import pytest
8
10
 
9
11
  import fsspec
10
- from fsspec.implementations.cached import CachingFileSystem
11
12
 
12
13
 
13
14
  @pytest.fixture()
@@ -27,16 +28,85 @@ def m():
27
28
  m.pseudo_dirs.append("")
28
29
 
29
30
 
30
- @pytest.fixture
31
+ class InstanceCacheInspector:
32
+ """
33
+ Helper class to inspect instance caches of filesystem classes in tests.
34
+ """
35
+
36
+ def clear(self) -> None:
37
+ """
38
+ Clear instance caches of all currently imported filesystem classes.
39
+ """
40
+ classes = deque([fsspec.spec.AbstractFileSystem])
41
+ while classes:
42
+ cls = classes.popleft()
43
+ cls.clear_instance_cache()
44
+ classes.extend(cls.__subclasses__())
45
+
46
+ def gather_counts(self, *, omit_zero: bool = True) -> dict[str, int]:
47
+ """
48
+ Gather counts of filesystem instances in the instance caches
49
+ of all currently imported filesystem classes.
50
+
51
+ Parameters
52
+ ----------
53
+ omit_zero:
54
+ Whether to omit instance types with no cached instances.
55
+ """
56
+ out: dict[str, int] = {}
57
+ classes = deque([fsspec.spec.AbstractFileSystem])
58
+ while classes:
59
+ cls = classes.popleft()
60
+ count = len(cls._cache) # there is no public interface for the cache
61
+ # note: skip intermediate AbstractFileSystem subclasses
62
+ # if they proxy the protocol attribute via a property.
63
+ if isinstance(cls.protocol, (Sequence, str)):
64
+ key = cls.protocol if isinstance(cls.protocol, str) else cls.protocol[0]
65
+ if count or not omit_zero:
66
+ out[key] = count
67
+ classes.extend(cls.__subclasses__())
68
+ return out
69
+
70
+
71
+ @pytest.fixture(scope="function", autouse=True)
72
+ def instance_caches() -> Generator[InstanceCacheInspector, None, None]:
73
+ """
74
+ Fixture to ensure empty filesystem instance caches before and after a test.
75
+
76
+ Used by default for all tests.
77
+ Clears caches of all imported filesystem classes.
78
+ Can be used to write test assertions about instance caches.
79
+
80
+ Usage:
81
+
82
+ def test_something(instance_caches):
83
+ # Test code here
84
+ fsspec.open("file://abc")
85
+ fsspec.open("memory://foo/bar")
86
+
87
+ # Test assertion
88
+ assert instance_caches.gather_counts() == {"file": 1, "memory": 1}
89
+
90
+ Returns
91
+ -------
92
+ instance_caches: An instance cache inspector for clearing and inspecting caches.
93
+ """
94
+ ic = InstanceCacheInspector()
95
+
96
+ ic.clear()
97
+ try:
98
+ yield ic
99
+ finally:
100
+ ic.clear()
101
+
102
+
103
+ @pytest.fixture(scope="function")
31
104
  def ftp_writable(tmpdir):
32
105
  """
33
106
  Fixture providing a writable FTP filesystem.
34
107
  """
35
108
  pytest.importorskip("pyftpdlib")
36
- from fsspec.implementations.ftp import FTPFileSystem
37
109
 
38
- FTPFileSystem.clear_instance_cache() # remove lingering connections
39
- CachingFileSystem.clear_instance_cache()
40
110
  d = str(tmpdir)
41
111
  with open(os.path.join(d, "out"), "wb") as f:
42
112
  f.write(b"hello" * 10000)
fsspec/core.py CHANGED
@@ -330,7 +330,7 @@ def open_files(
330
330
 
331
331
  def _un_chain(path, kwargs):
332
332
  # Avoid a circular import
333
- from fsspec.implementations.cached import CachingFileSystem
333
+ from fsspec.implementations.chained import ChainedFileSystem
334
334
 
335
335
  if "::" in path:
336
336
  x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
@@ -358,7 +358,7 @@ def _un_chain(path, kwargs):
358
358
  **kws,
359
359
  )
360
360
  bit = cls._strip_protocol(bit)
361
- if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
361
+ if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem):
362
362
  bit = previous_bit
363
363
  out.append((bit, protocol, kw))
364
364
  previous_bit = bit
fsspec/generic.py CHANGED
@@ -118,6 +118,8 @@ def rsync(
118
118
  if otherfile in otherfiles:
119
119
  if update_cond == "always":
120
120
  allfiles[k] = otherfile
121
+ elif update_cond == "never":
122
+ allfiles.pop(k)
121
123
  elif update_cond == "different":
122
124
  inf1 = source_field(v) if callable(source_field) else v[source_field]
123
125
  v2 = otherfiles[otherfile]
@@ -75,10 +75,13 @@ class ArrowFSWrapper(AbstractFileSystem):
75
75
  path = self._strip_protocol(path)
76
76
  from pyarrow.fs import FileSelector
77
77
 
78
- entries = [
79
- self._make_entry(entry)
80
- for entry in self.fs.get_file_info(FileSelector(path))
81
- ]
78
+ try:
79
+ entries = [
80
+ self._make_entry(entry)
81
+ for entry in self.fs.get_file_info(FileSelector(path))
82
+ ]
83
+ except (FileNotFoundError, NotADirectoryError):
84
+ entries = [self.info(path, **kwargs)]
82
85
  if detail:
83
86
  return entries
84
87
  else:
@@ -6,7 +6,7 @@ import fsspec
6
6
  from fsspec.asyn import AsyncFileSystem, running_async
7
7
 
8
8
 
9
- def async_wrapper(func, obj=None):
9
+ def async_wrapper(func, obj=None, semaphore=None):
10
10
  """
11
11
  Wraps a synchronous function to make it awaitable.
12
12
 
@@ -16,6 +16,8 @@ def async_wrapper(func, obj=None):
16
16
  The synchronous function to wrap.
17
17
  obj : object, optional
18
18
  The instance to bind the function to, if applicable.
19
+ semaphore : asyncio.Semaphore, optional
20
+ A semaphore to limit concurrent calls.
19
21
 
20
22
  Returns
21
23
  -------
@@ -25,6 +27,9 @@ def async_wrapper(func, obj=None):
25
27
 
26
28
  @functools.wraps(func)
27
29
  async def wrapper(*args, **kwargs):
30
+ if semaphore:
31
+ async with semaphore:
32
+ return await asyncio.to_thread(func, *args, **kwargs)
28
33
  return await asyncio.to_thread(func, *args, **kwargs)
29
34
 
30
35
  return wrapper
@@ -52,6 +57,8 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
52
57
  asynchronous=None,
53
58
  target_protocol=None,
54
59
  target_options=None,
60
+ semaphore=None,
61
+ max_concurrent_tasks=None,
55
62
  **kwargs,
56
63
  ):
57
64
  if asynchronous is None:
@@ -62,6 +69,7 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
62
69
  else:
63
70
  self.sync_fs = fsspec.filesystem(target_protocol, **target_options)
64
71
  self.protocol = self.sync_fs.protocol
72
+ self.semaphore = semaphore
65
73
  self._wrap_all_sync_methods()
66
74
 
67
75
  @property
@@ -83,7 +91,7 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
83
91
 
84
92
  method = getattr(self.sync_fs, method_name)
85
93
  if callable(method) and not inspect.iscoroutinefunction(method):
86
- async_method = async_wrapper(method, obj=self)
94
+ async_method = async_wrapper(method, obj=self, semaphore=self.semaphore)
87
95
  setattr(self, f"_{method_name}", async_method)
88
96
 
89
97
  @classmethod
@@ -9,13 +9,14 @@ import weakref
9
9
  from shutil import rmtree
10
10
  from typing import TYPE_CHECKING, Any, Callable, ClassVar
11
11
 
12
- from fsspec import AbstractFileSystem, filesystem
12
+ from fsspec import filesystem
13
13
  from fsspec.callbacks import DEFAULT_CALLBACK
14
14
  from fsspec.compression import compr
15
15
  from fsspec.core import BaseCache, MMapCache
16
16
  from fsspec.exceptions import BlocksizeMismatchError
17
17
  from fsspec.implementations.cache_mapper import create_cache_mapper
18
18
  from fsspec.implementations.cache_metadata import CacheMetadata
19
+ from fsspec.implementations.chained import ChainedFileSystem
19
20
  from fsspec.implementations.local import LocalFileSystem
20
21
  from fsspec.spec import AbstractBufferedFile
21
22
  from fsspec.transaction import Transaction
@@ -39,7 +40,7 @@ class WriteCachedTransaction(Transaction):
39
40
  self.fs = None # break cycle
40
41
 
41
42
 
42
- class CachingFileSystem(AbstractFileSystem):
43
+ class CachingFileSystem(ChainedFileSystem):
43
44
  """Locally caching filesystem, layer over any other FS
44
45
 
45
46
  This class implements chunk-wise local storage of remote files, for quick
@@ -60,6 +61,7 @@ class CachingFileSystem(AbstractFileSystem):
60
61
  """
61
62
 
62
63
  protocol: ClassVar[str | tuple[str, ...]] = ("blockcache", "cached")
64
+ _strip_tokenize_options = ("fo",)
63
65
 
64
66
  def __init__(
65
67
  self,
@@ -478,7 +480,7 @@ class CachingFileSystem(AbstractFileSystem):
478
480
  if item in ["transaction"]:
479
481
  # property
480
482
  return type(self).transaction.__get__(self)
481
- if item in ["_cache", "transaction_type"]:
483
+ if item in {"_cache", "transaction_type", "protocol"}:
482
484
  # class attributes
483
485
  return getattr(type(self), item)
484
486
  if item == "__class__":
@@ -886,6 +888,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
886
888
  rpaths = [p for l, p in zip(lpaths, paths) if l is False]
887
889
  lpaths = [l for l, p in zip(lpaths, paths) if l is False]
888
890
  self.fs.get(rpaths, lpaths)
891
+ paths = [self._check_file(p) for p in paths]
889
892
  return LocalFileSystem().cat_ranges(
890
893
  paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
891
894
  )
@@ -983,7 +986,9 @@ class LocalTempFile:
983
986
  os.remove(self.fn)
984
987
 
985
988
  def commit(self):
986
- self.fs.put(self.fn, self.path, **self.kwargs)
989
+ # calling put() with list arguments avoids path expansion and additional operations
990
+ # like isdir()
991
+ self.fs.put([self.fn], [self.path], **self.kwargs)
987
992
  # we do not delete the local copy, it's still in the cache.
988
993
 
989
994
  @property
@@ -0,0 +1,23 @@
1
+ from typing import ClassVar
2
+
3
+ from fsspec import AbstractFileSystem
4
+
5
+ __all__ = ("ChainedFileSystem",)
6
+
7
+
8
+ class ChainedFileSystem(AbstractFileSystem):
9
+ """Chained filesystem base class.
10
+
11
+ A chained filesystem is designed to be layered over another FS.
12
+ This is useful to implement things like caching.
13
+
14
+ This base class does very little on its own, but is used as a marker
15
+ that the class is designed for chaining.
16
+
17
+ Right now this is only used in `url_to_fs` to provide the path argument
18
+ (`fo`) to the chained filesystem from the underlying filesystem.
19
+
20
+ Additional functionality may be added in the future.
21
+ """
22
+
23
+ protocol: ClassVar[str] = "chained"
@@ -1,9 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import base64
2
4
  import urllib
3
5
 
4
6
  import requests
5
- import requests.exceptions
6
7
  from requests.adapters import HTTPAdapter, Retry
8
+ from typing_extensions import override
7
9
 
8
10
  from fsspec import AbstractFileSystem
9
11
  from fsspec.spec import AbstractBufferedFile
@@ -57,6 +59,24 @@ class DatabricksFileSystem(AbstractFileSystem):
57
59
 
58
60
  super().__init__(**kwargs)
59
61
 
62
+ @override
63
+ def _ls_from_cache(self, path) -> list[dict[str, str | int]] | None:
64
+ """Check cache for listing
65
+
66
+ Returns listing, if found (may be empty list for a directory that
67
+ exists but contains nothing), None if not in cache.
68
+ """
69
+ self.dircache.pop(path.rstrip("/"), None)
70
+
71
+ parent = self._parent(path)
72
+ if parent in self.dircache:
73
+ for entry in self.dircache[parent]:
74
+ if entry["name"] == path.rstrip("/"):
75
+ if entry["type"] != "directory":
76
+ return [entry]
77
+ return []
78
+ raise FileNotFoundError(path)
79
+
60
80
  def ls(self, path, detail=True, **kwargs):
61
81
  """
62
82
  List the contents of the given path.
@@ -70,7 +90,15 @@ class DatabricksFileSystem(AbstractFileSystem):
70
90
  but also additional information on file sizes
71
91
  and types.
72
92
  """
73
- out = self._ls_from_cache(path)
93
+ try:
94
+ out = self._ls_from_cache(path)
95
+ except FileNotFoundError:
96
+ # This happens if the `path`'s parent was cached, but `path` is not
97
+ # there. This suggests that `path` is new since the parent was
98
+ # cached. Attempt to invalidate parent's cache before continuing.
99
+ self.dircache.pop(self._parent(path), None)
100
+ out = None
101
+
74
102
  if not out:
75
103
  try:
76
104
  r = self._send_to_api(
@@ -460,7 +488,7 @@ class DatabricksFile(AbstractBufferedFile):
460
488
  return return_buffer
461
489
 
462
490
  def _to_sized_blocks(self, length, start=0):
463
- """Helper function to split a range from 0 to total_length into bloksizes"""
491
+ """Helper function to split a range from 0 to total_length into blocksizes"""
464
492
  end = start + length
465
493
  for data_chunk in range(start, end, self.blocksize):
466
494
  data_start = data_chunk
@@ -14,21 +14,21 @@ class GistFileSystem(AbstractFileSystem):
14
14
 
15
15
  Parameters
16
16
  ----------
17
- gist_id : str
17
+ gist_id: str
18
18
  The ID of the gist you want to access (the long hex value from the URL).
19
- filenames : list[str] (optional)
19
+ filenames: list[str] (optional)
20
20
  If provided, only make a file system representing these files, and do not fetch
21
21
  the list of all files for this gist.
22
- sha : str (optional)
22
+ sha: str (optional)
23
23
  If provided, fetch a particular revision of the gist. If omitted,
24
24
  the latest revision is used.
25
- username : str (optional)
26
- GitHub username for authentication (required if token is given).
27
- token : str (optional)
28
- GitHub personal access token (required if username is given).
29
- timeout : (float, float) or float, optional
25
+ username: str (optional)
26
+ GitHub username for authentication.
27
+ token: str (optional)
28
+ GitHub personal access token (required if username is given), or.
29
+ timeout: (float, float) or float, optional
30
30
  Connect and read timeouts for requests (default 60s each).
31
- kwargs : dict
31
+ kwargs: dict
32
32
  Stored on `self.request_kw` and passed to `requests.get` when fetching Gist
33
33
  metadata or reading ("opening") a file.
34
34
  """
@@ -51,10 +51,8 @@ class GistFileSystem(AbstractFileSystem):
51
51
  self.gist_id = gist_id
52
52
  self.filenames = filenames
53
53
  self.sha = sha # revision of the gist (optional)
54
- if (username is None) ^ (token is None):
55
- # Both or neither must be set
56
- if username or token:
57
- raise ValueError("Auth requires both username and token, or neither.")
54
+ if username is not None and token is None:
55
+ raise ValueError("User auth requires a token")
58
56
  self.username = username
59
57
  self.token = token
60
58
  self.request_kw = kwargs
@@ -67,9 +65,18 @@ class GistFileSystem(AbstractFileSystem):
67
65
  @property
68
66
  def kw(self):
69
67
  """Auth parameters passed to 'requests' if we have username/token."""
70
- if self.username is not None and self.token is not None:
71
- return {"auth": (self.username, self.token), **self.request_kw}
72
- return self.request_kw
68
+ kw = {
69
+ "headers": {
70
+ "Accept": "application/vnd.github+json",
71
+ "X-GitHub-Api-Version": "2022-11-28",
72
+ }
73
+ }
74
+ kw.update(self.request_kw)
75
+ if self.username and self.token:
76
+ kw["auth"] = (self.username, self.token)
77
+ elif self.token:
78
+ kw["headers"]["Authorization"] = f"Bearer {self.token}"
79
+ return kw
73
80
 
74
81
  def _fetch_gist_metadata(self):
75
82
  """
@@ -229,4 +236,6 @@ class GistFileSystem(AbstractFileSystem):
229
236
  pass # skip
230
237
  else:
231
238
  out[p] = e
239
+ if len(paths) == 1 and paths[0] == path:
240
+ return out[path]
232
241
  return out
@@ -43,6 +43,7 @@ class HTTPFileSystem(AsyncFileSystem):
43
43
  HTML href tags will be used.
44
44
  """
45
45
 
46
+ protocol = ("http", "https")
46
47
  sep = "/"
47
48
 
48
49
  def __init__(
@@ -873,7 +874,7 @@ async def _file_info(url, session, size_policy="head", **kwargs):
873
874
 
874
875
  info["url"] = str(r.url)
875
876
 
876
- for checksum_field in ["ETag", "Content-MD5", "Digest"]:
877
+ for checksum_field in ["ETag", "Content-MD5", "Digest", "Last-Modified"]:
877
878
  if r.headers.get(checksum_field):
878
879
  info[checksum_field] = r.headers[checksum_field]
879
880
 
@@ -42,7 +42,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
42
42
  path = self._strip_protocol(path)
43
43
  r = self.session.get(f"{self.url}/{path}")
44
44
  if r.status_code == 404:
45
- return FileNotFoundError(path)
45
+ raise FileNotFoundError(path)
46
46
  r.raise_for_status()
47
47
  out = r.json()
48
48
 
@@ -63,7 +63,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
63
63
  path = self._strip_protocol(path)
64
64
  r = self.session.get(f"{self.url}/{path}")
65
65
  if r.status_code == 404:
66
- return FileNotFoundError(path)
66
+ raise FileNotFoundError(path)
67
67
  r.raise_for_status()
68
68
  out = r.json()
69
69
  if out["format"] == "text":
@@ -98,6 +98,11 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
98
98
  }
99
99
  self.session.put(f"{self.url}/{path}", json=json)
100
100
 
101
+ def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
102
+ if path1 == path2:
103
+ return
104
+ self.session.patch(f"{self.url}/{path1}", json={"path": path2})
105
+
101
106
  def _rm(self, path):
102
107
  path = self._strip_protocol(path)
103
108
  self.session.delete(f"{self.url}/{path}")
@@ -187,10 +187,10 @@ class MemoryFileSystem(AbstractFileSystem):
187
187
  parent = self._parent(parent)
188
188
  if self.isfile(parent):
189
189
  raise FileExistsError(parent)
190
- if mode in ["rb", "ab", "r+b"]:
190
+ if mode in ["rb", "ab", "r+b", "a+b"]:
191
191
  if path in self.store:
192
192
  f = self.store[path]
193
- if mode == "ab":
193
+ if "a" in mode:
194
194
  # position at the end of file
195
195
  f.seek(0, 2)
196
196
  else:
@@ -199,8 +199,8 @@ class MemoryFileSystem(AbstractFileSystem):
199
199
  return f
200
200
  else:
201
201
  raise FileNotFoundError(path)
202
- elif mode in {"wb", "xb"}:
203
- if mode == "xb" and self.exists(path):
202
+ elif mode in {"wb", "w+b", "xb", "x+b"}:
203
+ if "x" in mode and self.exists(path):
204
204
  raise FileExistsError
205
205
  m = MemoryFile(self, path, kwargs.get("data"))
206
206
  if not self._intrans:
@@ -22,7 +22,11 @@ from fsspec.asyn import AsyncFileSystem
22
22
  from fsspec.callbacks import DEFAULT_CALLBACK
23
23
  from fsspec.core import filesystem, open, split_protocol
24
24
  from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
25
- from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
25
+ from fsspec.utils import (
26
+ isfilelike,
27
+ merge_offset_ranges,
28
+ other_paths,
29
+ )
26
30
 
27
31
  logger = logging.getLogger("fsspec.reference")
28
32
 
@@ -698,13 +702,9 @@ class ReferenceFileSystem(AsyncFileSystem):
698
702
  **(ref_storage_args or target_options or {}), protocol=target_protocol
699
703
  )
700
704
  ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
701
- if ref_fs.isfile(fo2):
702
- # text JSON
703
- with fsspec.open(fo, "rb", **dic) as f:
704
- logger.info("Read reference from URL %s", fo)
705
- text = json.load(f)
706
- self._process_references(text, template_overrides)
707
- else:
705
+ if ".json" not in fo2 and (
706
+ fo.endswith(("parq", "parquet", "/")) or ref_fs.isdir(fo2)
707
+ ):
708
708
  # Lazy parquet refs
709
709
  logger.info("Open lazy reference dict from URL %s", fo)
710
710
  self.references = LazyReferenceMapper(
@@ -712,6 +712,12 @@ class ReferenceFileSystem(AsyncFileSystem):
712
712
  fs=ref_fs,
713
713
  cache_size=cache_size,
714
714
  )
715
+ else:
716
+ # text JSON
717
+ with fsspec.open(fo, "rb", **dic) as f:
718
+ logger.info("Read reference from URL %s", fo)
719
+ text = json.load(f)
720
+ self._process_references(text, template_overrides)
715
721
  else:
716
722
  # dictionaries
717
723
  self._process_references(fo, template_overrides)
@@ -66,6 +66,7 @@ class SFTPFileSystem(AbstractFileSystem):
66
66
  return out
67
67
 
68
68
  def mkdir(self, path, create_parents=True, mode=511):
69
+ path = self._strip_protocol(path)
69
70
  logger.debug("Creating folder %s", path)
70
71
  if self.exists(path):
71
72
  raise FileExistsError(f"File exists: {path}")
@@ -89,10 +90,12 @@ class SFTPFileSystem(AbstractFileSystem):
89
90
  self.ftp.mkdir(new_path, mode)
90
91
 
91
92
  def rmdir(self, path):
93
+ path = self._strip_protocol(path)
92
94
  logger.debug("Removing folder %s", path)
93
95
  self.ftp.rmdir(path)
94
96
 
95
97
  def info(self, path):
98
+ path = self._strip_protocol(path)
96
99
  stat = self._decode_stat(self.ftp.stat(path))
97
100
  stat["name"] = path
98
101
  return stat
@@ -123,6 +126,7 @@ class SFTPFileSystem(AbstractFileSystem):
123
126
  return out
124
127
 
125
128
  def ls(self, path, detail=False):
129
+ path = self._strip_protocol(path)
126
130
  logger.debug("Listing folder %s", path)
127
131
  stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
128
132
  if detail:
@@ -132,6 +136,7 @@ class SFTPFileSystem(AbstractFileSystem):
132
136
  return sorted(paths)
133
137
 
134
138
  def put(self, lpath, rpath, callback=None, **kwargs):
139
+ rpath = self._strip_protocol(rpath)
135
140
  logger.debug("Put file %s into %s", lpath, rpath)
136
141
  self.ftp.put(lpath, rpath)
137
142
 
@@ -168,6 +173,8 @@ class SFTPFileSystem(AbstractFileSystem):
168
173
  self.ftp.remove(path)
169
174
 
170
175
  def mv(self, old, new):
176
+ new = self._strip_protocol(new)
177
+ old = self._strip_protocol(old)
171
178
  logger.debug("Renaming %s into %s", old, new)
172
179
  self.ftp.posix_rename(old, new)
173
180
 
@@ -268,7 +268,7 @@ class WebHDFS(AbstractFileSystem):
268
268
  info["name"] = path
269
269
  return self._process_info(info)
270
270
 
271
- def ls(self, path, detail=False):
271
+ def ls(self, path, detail=False, **kwargs):
272
272
  out = self._call("LISTSTATUS", path=path)
273
273
  infos = out.json()["FileStatuses"]["FileStatus"]
274
274
  for info in infos:
fsspec/spec.py CHANGED
@@ -67,6 +67,9 @@ class _Cached(type):
67
67
  extra_tokens = tuple(
68
68
  getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
69
69
  )
70
+ strip_tokenize_options = {
71
+ k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
72
+ }
70
73
  token = tokenize(
71
74
  cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
72
75
  )
@@ -78,7 +81,7 @@ class _Cached(type):
78
81
  cls._latest = token
79
82
  return cls._cache[token]
80
83
  else:
81
- obj = super().__call__(*args, **kwargs)
84
+ obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
82
85
  # Setting _fs_token here causes some static linters to complain.
83
86
  obj._fs_token_ = token
84
87
  obj.storage_args = args
@@ -115,6 +118,8 @@ class AbstractFileSystem(metaclass=_Cached):
115
118
 
116
119
  #: Extra *class attributes* that should be considered when hashing.
117
120
  _extra_tokenize_attributes = ()
121
+ #: *storage options* that should not be considered when hashing.
122
+ _strip_tokenize_options = ()
118
123
 
119
124
  # Set by _Cached metaclass
120
125
  storage_args: tuple[Any, ...]
@@ -892,7 +897,7 @@ class AbstractFileSystem(metaclass=_Cached):
892
897
  dict of {path: contents} if there are multiple paths
893
898
  or the path has been otherwise expanded
894
899
  """
895
- paths = self.expand_path(path, recursive=recursive)
900
+ paths = self.expand_path(path, recursive=recursive, **kwargs)
896
901
  if (
897
902
  len(paths) > 1
898
903
  or isinstance(path, list)
@@ -972,7 +977,9 @@ class AbstractFileSystem(metaclass=_Cached):
972
977
  )
973
978
 
974
979
  source_is_str = isinstance(rpath, str)
975
- rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
980
+ rpaths = self.expand_path(
981
+ rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
982
+ )
976
983
  if source_is_str and (not recursive or maxdepth is not None):
977
984
  # Non-recursive glob does not copy directories
978
985
  rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
@@ -1060,7 +1067,9 @@ class AbstractFileSystem(metaclass=_Cached):
1060
1067
  if source_is_str:
1061
1068
  lpath = make_path_posix(lpath)
1062
1069
  fs = LocalFileSystem()
1063
- lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
1070
+ lpaths = fs.expand_path(
1071
+ lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
1072
+ )
1064
1073
  if source_is_str and (not recursive or maxdepth is not None):
1065
1074
  # Non-recursive glob does not copy directories
1066
1075
  lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
@@ -1131,7 +1140,9 @@ class AbstractFileSystem(metaclass=_Cached):
1131
1140
  from .implementations.local import trailing_sep
1132
1141
 
1133
1142
  source_is_str = isinstance(path1, str)
1134
- paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
1143
+ paths1 = self.expand_path(
1144
+ path1, recursive=recursive, maxdepth=maxdepth, **kwargs
1145
+ )
1135
1146
  if source_is_str and (not recursive or maxdepth is not None):
1136
1147
  # Non-recursive glob does not copy directories
1137
1148
  paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
@@ -1172,7 +1183,7 @@ class AbstractFileSystem(metaclass=_Cached):
1172
1183
  raise ValueError("maxdepth must be at least 1")
1173
1184
 
1174
1185
  if isinstance(path, (str, os.PathLike)):
1175
- out = self.expand_path([path], recursive, maxdepth)
1186
+ out = self.expand_path([path], recursive, maxdepth, **kwargs)
1176
1187
  else:
1177
1188
  out = set()
1178
1189
  path = [self._strip_protocol(p) for p in path]
fsspec/utils.py CHANGED
@@ -438,6 +438,14 @@ def get_protocol(url: str) -> str:
438
438
  return "file"
439
439
 
440
440
 
441
+ def get_file_extension(url: str) -> str:
442
+ url = stringify_path(url)
443
+ ext_parts = url.rsplit(".", 1)
444
+ if len(ext_parts) > 1:
445
+ return ext_parts[-1]
446
+ return ""
447
+
448
+
441
449
  def can_be_local(path: str) -> bool:
442
450
  """Can the given URL be used with open_local?"""
443
451
  from fsspec import get_filesystem_class
@@ -1,45 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2025.7.0
3
+ Version: 2025.10.0
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
7
7
  Project-URL: Homepage, https://github.com/fsspec/filesystem_spec
8
8
  Maintainer-email: Martin Durant <mdurant@anaconda.com>
9
- License: BSD 3-Clause License
10
-
11
- Copyright (c) 2018, Martin Durant
12
- All rights reserved.
13
-
14
- Redistribution and use in source and binary forms, with or without
15
- modification, are permitted provided that the following conditions are met:
16
-
17
- * Redistributions of source code must retain the above copyright notice, this
18
- list of conditions and the following disclaimer.
19
-
20
- * Redistributions in binary form must reproduce the above copyright notice,
21
- this list of conditions and the following disclaimer in the documentation
22
- and/or other materials provided with the distribution.
23
-
24
- * Neither the name of the copyright holder nor the names of its
25
- contributors may be used to endorse or promote products derived from
26
- this software without specific prior written permission.
27
-
28
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
32
- FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33
- DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
34
- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35
- CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
36
- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9
+ License-Expression: BSD-3-Clause
38
10
  License-File: LICENSE
39
11
  Keywords: file
40
12
  Classifier: Development Status :: 4 - Beta
41
13
  Classifier: Intended Audience :: Developers
42
- Classifier: License :: OSI Approved :: BSD License
43
14
  Classifier: Operating System :: OS Independent
44
15
  Classifier: Programming Language :: Python :: 3.9
45
16
  Classifier: Programming Language :: Python :: 3.10
@@ -1,50 +1,51 @@
1
1
  fsspec/__init__.py,sha256=L7qwNBU1iMNQd8Of87HYSNFT9gWlNMSESaJC8fY0AaQ,2053
2
- fsspec/_version.py,sha256=BxtkhBSbP2A9Z9pLCoNSk_l6NzaY9SDK9dmjDgIXO54,517
2
+ fsspec/_version.py,sha256=fXgQLiXV0scw4LTidVAhOWJj_BwnxWigALeToXadaR0,712
3
3
  fsspec/archive.py,sha256=vM6t_lgV6lBWbBYwpm3S4ofBQFQxUPr5KkDQrrQcQro,2411
4
4
  fsspec/asyn.py,sha256=mE55tO_MmGcxD14cUuaiS3veAqo0h6ZqANfnUuCN3sk,36365
5
5
  fsspec/caching.py,sha256=86uSgPa5E55b28XEhuC-dMcKAxJtZZnpQqnHTwaF3hI,34294
6
6
  fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
7
7
  fsspec/compression.py,sha256=gBK2MV_oTFVW2XDq8bZVbYQKYrl6JDUou6_-kyvmxuk,5086
8
8
  fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
9
- fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
10
- fsspec/core.py,sha256=1tLctwr7sF1VO3djc_UkjhJ8IAEy0TUMH_bb07Sw17E,23828
9
+ fsspec/conftest.py,sha256=uWfm_Qs5alPRxOhRpDfQ0-1jqSJ54pni4y96IxOREXM,3446
10
+ fsspec/core.py,sha256=ETQrATK6ZSkuIoy5-40N_NWUfMGx1KVSl5XGuJsaoYI,23829
11
11
  fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
12
12
  fsspec/exceptions.py,sha256=pauSLDMxzTJMOjvX1WEUK0cMyFkrFxpWJsyFywav7A8,331
13
13
  fsspec/fuse.py,sha256=Q-3NOOyLqBfYa4Db5E19z_ZY36zzYHtIs1mOUasItBQ,10177
14
- fsspec/generic.py,sha256=K-b03ifKidHUo99r8nz2pB6oGyf88RtTKahCuBF9ZVU,13409
14
+ fsspec/generic.py,sha256=9QHQYMNb-8w8-eYuIqShcTjO_LeHXFoQTyt8J5oEq5Q,13482
15
15
  fsspec/gui.py,sha256=CQ7QsrTpaDlWSLNOpwNoJc7khOcYXIZxmrAJN9bHWQU,14002
16
16
  fsspec/json.py,sha256=3BfNSQ96MB4Xao_ocjheINeqZM2ev7oljUzR5XmNXrE,3814
17
17
  fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
18
18
  fsspec/parquet.py,sha256=6ibAmG527L5JNFS0VO8BDNlxHdA3bVYqdByeiFgpUVM,19448
19
19
  fsspec/registry.py,sha256=epoYryFFzDWjbkQJfh6xkF3nEu8RTiOzV3-voi8Pshs,12048
20
- fsspec/spec.py,sha256=7cOUe5PC5Uyf56HtGBUHEoym8ktPj-BI8G4HR8Xd_C8,77298
20
+ fsspec/spec.py,sha256=Ym-Ust6LRjHgbhrmvNqwOBZxoVnaw3g3xHXMZGHx_xg,77692
21
21
  fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
22
- fsspec/utils.py,sha256=HC8RFbb7KpEDedsYxExvWvsTObEuUcuuWxd0B_MyGpo,22995
22
+ fsspec/utils.py,sha256=12npx0j-z0qpG9ko3laZ0us4KhjMe-2epmyocJB2ih8,23173
23
23
  fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
25
- fsspec/implementations/asyn_wrapper.py,sha256=435NV_LyrRic3WvSxMWq7B8QGV_Ovzi-vYd2W1_1YtM,3326
24
+ fsspec/implementations/arrow.py,sha256=CVVyjNt9B_pRa5Ac1XIlhVLBs7vA5kCUZjsLPELb4d4,8758
25
+ fsspec/implementations/asyn_wrapper.py,sha256=fox9yjsEu7NCgzdAZJYfNALtUnFkIc_QmeKzaSllZho,3679
26
26
  fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
27
27
  fsspec/implementations/cache_metadata.py,sha256=rddh5-0SXIeyWCPpBpOFcaAyWoPyeYmFfeubEWt-nRM,8536
28
- fsspec/implementations/cached.py,sha256=59lyWvbzvX_yYC9cVASrktOdjmK6w-e7dNtNBJHaONQ,35103
28
+ fsspec/implementations/cached.py,sha256=d3IE33J5QA4QU_e43fPLF-dpbGszv6JM9mcFmHBI99o,35365
29
+ fsspec/implementations/chained.py,sha256=iGivpNaHUFjB_ea0-HAPhcmm6CL8qnDf270PSj7JwuE,680
29
30
  fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
30
31
  fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
31
- fsspec/implementations/dbfs.py,sha256=2Bp-0m9SqlaroDa0KbXxb5BobCyBJ7_5YQBISf3fxbQ,15145
32
+ fsspec/implementations/dbfs.py,sha256=1cvvC6KBWOb8pBVpc01xavVbEPXO1xsgZvPD7H73M9k,16217
32
33
  fsspec/implementations/dirfs.py,sha256=f1sGnQ9Vf0xTxrXo4jDeBy4Qfq3RTqAEemqBSeb0hwY,12108
33
34
  fsspec/implementations/ftp.py,sha256=bzL_TgH77nMMtTMewRGkbq4iObSHGu7YoMRCXBH4nrc,11639
34
- fsspec/implementations/gist.py,sha256=Ost985hmFr50KsA-QD0shY3hP4KX5qJ9rb5C-X4ehK8,8341
35
+ fsspec/implementations/gist.py,sha256=Y6jTDrE-wuTwvpPyAQDuuOMBGxlajafKWoB1_yX6jdY,8528
35
36
  fsspec/implementations/git.py,sha256=qBDWMz5LNllPqVjr5jf_1FuNha4P5lyQI3IlhYg-wUE,3731
36
37
  fsspec/implementations/github.py,sha256=aCsZL8UvXZgdkcB1RUs3DdLeNrjLKcFsFYeQFDWbBFo,11653
37
- fsspec/implementations/http.py,sha256=3LhYuRU3yw3v3tN8Oqz6EbJRl3ab2Sg_zsGOIv0E2gE,30418
38
+ fsspec/implementations/http.py,sha256=IxwekrxehP_l02yMiSHE7Xl1trCO5FzKTycH_iuHQG0,30468
38
39
  fsspec/implementations/http_sync.py,sha256=UydDqSdUBdhiJ1KufzV8rKGrTftFR4QmNV0safILb8g,30133
39
- fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
40
+ fsspec/implementations/jupyter.py,sha256=q1PlQ66AAswGFyr8MFKWyobaV2YekMWRtqENBDQtD28,4002
40
41
  fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
41
42
  fsspec/implementations/local.py,sha256=DQeK7jRGv4_mJAweLKALO5WzIIkjXxZ_jRvwQ_xadSA,16936
42
- fsspec/implementations/memory.py,sha256=Kc6TZSbZ4tdi-6cE5ttEPIgMyq9aAt6cDdVLFRTJvf8,10488
43
- fsspec/implementations/reference.py,sha256=npYj49AmR8rmON9t_BLpfEXqhgsardUeynamqyraOXo,48704
44
- fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
43
+ fsspec/implementations/memory.py,sha256=TDdLtSPWXxZKrrVGwmc3uS3oK_2mlcVTk2BiqR8IeII,10507
44
+ fsspec/implementations/reference.py,sha256=3hr_CusIR1wBGo20MsKGoWCEnZJ626_QlHhRYobVYo0,48816
45
+ fsspec/implementations/sftp.py,sha256=L9pZOa6eLUWfJNtxkxeG2YI96SQwrM5Hj6ocyUZXUbg,5923
45
46
  fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
46
47
  fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
47
- fsspec/implementations/webhdfs.py,sha256=G9wGywj7BkZk4Mu9zXu6HaDlEqX4F8Gw1i4k46CP_-o,16769
48
+ fsspec/implementations/webhdfs.py,sha256=PUgZM9HbVPAeW4u4B-rWl8wTcKKpPhwZO7xcINDmTNQ,16779
48
49
  fsspec/implementations/zip.py,sha256=9LBMHPft2OutJl2Ft-r9u_z3GptLkc2n91ur2A3bCbg,6072
49
50
  fsspec/tests/abstract/__init__.py,sha256=4xUJrv7gDgc85xAOz1p-V_K1hrsdMWTSa0rviALlJk8,10181
50
51
  fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
@@ -54,7 +55,7 @@ fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1
54
55
  fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
55
56
  fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
56
57
  fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
57
- fsspec-2025.7.0.dist-info/METADATA,sha256=fPEhTbN6wi6KOz3IkiJQcOTYvQXVEjzTv9gzyX-KRHI,12161
58
- fsspec-2025.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
59
- fsspec-2025.7.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
60
- fsspec-2025.7.0.dist-info/RECORD,,
58
+ fsspec-2025.10.0.dist-info/METADATA,sha256=M950PL-JM4aP_1zCRiebQ-lOaUqy9_4kra0-dqA8tCI,10398
59
+ fsspec-2025.10.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
60
+ fsspec-2025.10.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
61
+ fsspec-2025.10.0.dist-info/RECORD,,