fsspec 2025.9.0__py3-none-any.whl → 2025.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
42
42
  path = self._strip_protocol(path)
43
43
  r = self.session.get(f"{self.url}/{path}")
44
44
  if r.status_code == 404:
45
- return FileNotFoundError(path)
45
+ raise FileNotFoundError(path)
46
46
  r.raise_for_status()
47
47
  out = r.json()
48
48
 
@@ -63,7 +63,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
63
63
  path = self._strip_protocol(path)
64
64
  r = self.session.get(f"{self.url}/{path}")
65
65
  if r.status_code == 404:
66
- return FileNotFoundError(path)
66
+ raise FileNotFoundError(path)
67
67
  r.raise_for_status()
68
68
  out = r.json()
69
69
  if out["format"] == "text":
@@ -98,6 +98,11 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
98
98
  }
99
99
  self.session.put(f"{self.url}/{path}", json=json)
100
100
 
101
+ def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
102
+ if path1 == path2:
103
+ return
104
+ self.session.patch(f"{self.url}/{path1}", json={"path": path2})
105
+
101
106
  def _rm(self, path):
102
107
  path = self._strip_protocol(path)
103
108
  self.session.delete(f"{self.url}/{path}")
@@ -195,7 +195,7 @@ class LibArchiveFileSystem(AbstractArchiveFileSystem):
195
195
  if mode != "rb":
196
196
  raise NotImplementedError
197
197
 
198
- data = bytes()
198
+ data = b""
199
199
  with self._open_archive() as arc:
200
200
  for entry in arc:
201
201
  if entry.pathname != path:
@@ -187,10 +187,10 @@ class MemoryFileSystem(AbstractFileSystem):
187
187
  parent = self._parent(parent)
188
188
  if self.isfile(parent):
189
189
  raise FileExistsError(parent)
190
- if mode in ["rb", "ab", "r+b"]:
190
+ if mode in ["rb", "ab", "r+b", "a+b"]:
191
191
  if path in self.store:
192
192
  f = self.store[path]
193
- if mode == "ab":
193
+ if "a" in mode:
194
194
  # position at the end of file
195
195
  f.seek(0, 2)
196
196
  else:
@@ -199,8 +199,8 @@ class MemoryFileSystem(AbstractFileSystem):
199
199
  return f
200
200
  else:
201
201
  raise FileNotFoundError(path)
202
- elif mode in {"wb", "xb"}:
203
- if mode == "xb" and self.exists(path):
202
+ elif mode in {"wb", "w+b", "xb", "x+b"}:
203
+ if "x" in mode and self.exists(path):
204
204
  raise FileExistsError
205
205
  m = MemoryFile(self, path, kwargs.get("data"))
206
206
  if not self._intrans:
@@ -22,7 +22,11 @@ from fsspec.asyn import AsyncFileSystem
22
22
  from fsspec.callbacks import DEFAULT_CALLBACK
23
23
  from fsspec.core import filesystem, open, split_protocol
24
24
  from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
25
- from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
25
+ from fsspec.utils import (
26
+ isfilelike,
27
+ merge_offset_ranges,
28
+ other_paths,
29
+ )
26
30
 
27
31
  logger = logging.getLogger("fsspec.reference")
28
32
 
@@ -215,7 +219,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
215
219
  fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
216
220
  return LazyReferenceMapper(root, fs, **kwargs)
217
221
 
218
- @lru_cache()
222
+ @lru_cache
219
223
  def listdir(self):
220
224
  """List top-level directories"""
221
225
  dirs = (p.rsplit("/", 1)[0] for p in self.zmetadata if not p.startswith(".z"))
@@ -698,13 +702,9 @@ class ReferenceFileSystem(AsyncFileSystem):
698
702
  **(ref_storage_args or target_options or {}), protocol=target_protocol
699
703
  )
700
704
  ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
701
- if ref_fs.isfile(fo2):
702
- # text JSON
703
- with fsspec.open(fo, "rb", **dic) as f:
704
- logger.info("Read reference from URL %s", fo)
705
- text = json.load(f)
706
- self._process_references(text, template_overrides)
707
- else:
705
+ if ".json" not in fo2 and (
706
+ fo.endswith(("parq", "parquet", "/")) or ref_fs.isdir(fo2)
707
+ ):
708
708
  # Lazy parquet refs
709
709
  logger.info("Open lazy reference dict from URL %s", fo)
710
710
  self.references = LazyReferenceMapper(
@@ -712,6 +712,12 @@ class ReferenceFileSystem(AsyncFileSystem):
712
712
  fs=ref_fs,
713
713
  cache_size=cache_size,
714
714
  )
715
+ else:
716
+ # text JSON
717
+ with fsspec.open(fo, "rb", **dic) as f:
718
+ logger.info("Read reference from URL %s", fo)
719
+ text = json.load(f)
720
+ self._process_references(text, template_overrides)
715
721
  else:
716
722
  # dictionaries
717
723
  self._process_references(fo, template_overrides)
@@ -66,6 +66,7 @@ class SFTPFileSystem(AbstractFileSystem):
66
66
  return out
67
67
 
68
68
  def mkdir(self, path, create_parents=True, mode=511):
69
+ path = self._strip_protocol(path)
69
70
  logger.debug("Creating folder %s", path)
70
71
  if self.exists(path):
71
72
  raise FileExistsError(f"File exists: {path}")
@@ -89,10 +90,12 @@ class SFTPFileSystem(AbstractFileSystem):
89
90
  self.ftp.mkdir(new_path, mode)
90
91
 
91
92
  def rmdir(self, path):
93
+ path = self._strip_protocol(path)
92
94
  logger.debug("Removing folder %s", path)
93
95
  self.ftp.rmdir(path)
94
96
 
95
97
  def info(self, path):
98
+ path = self._strip_protocol(path)
96
99
  stat = self._decode_stat(self.ftp.stat(path))
97
100
  stat["name"] = path
98
101
  return stat
@@ -123,6 +126,7 @@ class SFTPFileSystem(AbstractFileSystem):
123
126
  return out
124
127
 
125
128
  def ls(self, path, detail=False):
129
+ path = self._strip_protocol(path)
126
130
  logger.debug("Listing folder %s", path)
127
131
  stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
128
132
  if detail:
@@ -132,6 +136,7 @@ class SFTPFileSystem(AbstractFileSystem):
132
136
  return sorted(paths)
133
137
 
134
138
  def put(self, lpath, rpath, callback=None, **kwargs):
139
+ rpath = self._strip_protocol(rpath)
135
140
  logger.debug("Put file %s into %s", lpath, rpath)
136
141
  self.ftp.put(lpath, rpath)
137
142
 
@@ -168,6 +173,8 @@ class SFTPFileSystem(AbstractFileSystem):
168
173
  self.ftp.remove(path)
169
174
 
170
175
  def mv(self, old, new):
176
+ new = self._strip_protocol(new)
177
+ old = self._strip_protocol(old)
171
178
  logger.debug("Renaming %s into %s", old, new)
172
179
  self.ftp.posix_rename(old, new)
173
180
 
@@ -268,7 +268,7 @@ class WebHDFS(AbstractFileSystem):
268
268
  info["name"] = path
269
269
  return self._process_info(info)
270
270
 
271
- def ls(self, path, detail=False):
271
+ def ls(self, path, detail=False, **kwargs):
272
272
  out = self._call("LISTSTATUS", path=path)
273
273
  infos = out.json()["FileStatuses"]["FileStatus"]
274
274
  for info in infos:
fsspec/json.py CHANGED
@@ -1,13 +1,8 @@
1
1
  import json
2
- from collections.abc import Mapping, Sequence
2
+ from collections.abc import Callable, Mapping, Sequence
3
3
  from contextlib import suppress
4
4
  from pathlib import PurePath
5
- from typing import (
6
- Any,
7
- Callable,
8
- ClassVar,
9
- Optional,
10
- )
5
+ from typing import Any, ClassVar
11
6
 
12
7
  from .registry import _import_class, get_filesystem_class
13
8
  from .spec import AbstractFileSystem
@@ -45,12 +40,12 @@ class FilesystemJSONDecoder(json.JSONDecoder):
45
40
  def __init__(
46
41
  self,
47
42
  *,
48
- object_hook: Optional[Callable[[dict[str, Any]], Any]] = None,
49
- parse_float: Optional[Callable[[str], Any]] = None,
50
- parse_int: Optional[Callable[[str], Any]] = None,
51
- parse_constant: Optional[Callable[[str], Any]] = None,
43
+ object_hook: Callable[[dict[str, Any]], Any] | None = None,
44
+ parse_float: Callable[[str], Any] | None = None,
45
+ parse_int: Callable[[str], Any] | None = None,
46
+ parse_constant: Callable[[str], Any] | None = None,
52
47
  strict: bool = True,
53
- object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None,
48
+ object_pairs_hook: Callable[[list[tuple[str, Any]]], Any] | None = None,
54
49
  ) -> None:
55
50
  self.original_object_hook = object_hook
56
51
 
fsspec/parquet.py CHANGED
@@ -1,8 +1,12 @@
1
1
  import io
2
2
  import json
3
3
  import warnings
4
+ from typing import Literal
5
+
6
+ import fsspec
4
7
 
5
8
  from .core import url_to_fs
9
+ from .spec import AbstractBufferedFile
6
10
  from .utils import merge_offset_ranges
7
11
 
8
12
  # Parquet-Specific Utilities for fsspec
@@ -14,19 +18,24 @@ from .utils import merge_offset_ranges
14
18
  # on remote file systems.
15
19
 
16
20
 
17
- def open_parquet_file(
18
- path,
19
- mode="rb",
20
- fs=None,
21
+ class AlreadyBufferedFile(AbstractBufferedFile):
22
+ def _fetch_range(self, start, end):
23
+ raise NotImplementedError
24
+
25
+
26
+ def open_parquet_files(
27
+ path: list[str],
28
+ mode: Literal["rb"] = "rb",
29
+ fs: None | fsspec.AbstractFileSystem = None,
21
30
  metadata=None,
22
- columns=None,
23
- row_groups=None,
24
- storage_options=None,
25
- strict=False,
26
- engine="auto",
27
- max_gap=64_000,
28
- max_block=256_000_000,
29
- footer_sample_size=1_000_000,
31
+ columns: None | list[str] = None,
32
+ row_groups: None | list[int] = None,
33
+ storage_options: None | dict = None,
34
+ engine: str = "auto",
35
+ max_gap: int = 64_000,
36
+ max_block: int = 256_000_000,
37
+ footer_sample_size: int = 1_000_000,
38
+ filters: None | list[list[list[str]]] = None,
30
39
  **kwargs,
31
40
  ):
32
41
  """
@@ -72,12 +81,6 @@ def open_parquet_file(
72
81
  storage_options : dict, optional
73
82
  Used to generate an `AbstractFileSystem` object if `fs` was
74
83
  not specified.
75
- strict : bool, optional
76
- Whether the resulting `KnownPartsOfAFile` cache should
77
- fetch reads that go beyond a known byte-range boundary.
78
- If `False` (the default), any read that ends outside a
79
- known part will be zero padded. Note that using
80
- `strict=True` may be useful for debugging.
81
84
  max_gap : int, optional
82
85
  Neighboring byte ranges will only be merged when their
83
86
  inter-range gap is <= `max_gap`. Default is 64KB.
@@ -89,6 +92,10 @@ def open_parquet_file(
89
92
  for the footer metadata. If the sampled bytes do not contain
90
93
  the footer, a second read request will be required, and
91
94
  performance will suffer. Default is 1MB.
95
+ filters : list[list], optional
96
+ List of filters to apply to prevent reading row groups, of the
97
+ same format as accepted by the loading engines. Ignored if
98
+ ``row_groups`` is specified.
92
99
  **kwargs :
93
100
  Optional key-word arguments to pass to `fs.open`
94
101
  """
@@ -96,20 +103,36 @@ def open_parquet_file(
96
103
  # Make sure we have an `AbstractFileSystem` object
97
104
  # to work with
98
105
  if fs is None:
99
- fs = url_to_fs(path, **(storage_options or {}))[0]
106
+ path0 = path
107
+ if isinstance(path, (list, tuple)):
108
+ path = path[0]
109
+ fs, path = url_to_fs(path, **(storage_options or {}))
110
+ else:
111
+ path0 = path
100
112
 
101
- # For now, `columns == []` not supported. Just use
102
- # default `open` command with `path` input
113
+ # For now, `columns == []` not supported, is the same
114
+ # as all columns
103
115
  if columns is not None and len(columns) == 0:
104
- return fs.open(path, mode=mode)
116
+ columns = None
105
117
 
106
118
  # Set the engine
107
119
  engine = _set_engine(engine)
108
120
 
109
- # Fetch the known byte ranges needed to read
110
- # `columns` and/or `row_groups`
121
+ if isinstance(path0, (list, tuple)):
122
+ paths = path0
123
+ elif "*" in path:
124
+ paths = fs.glob(path)
125
+ elif path0.endswith("/"): # or fs.isdir(path):
126
+ paths = [
127
+ _
128
+ for _ in fs.find(path, withdirs=False, detail=False)
129
+ if _.endswith((".parquet", ".parq"))
130
+ ]
131
+ else:
132
+ paths = [path]
133
+
111
134
  data = _get_parquet_byte_ranges(
112
- [path],
135
+ paths,
113
136
  fs,
114
137
  metadata=metadata,
115
138
  columns=columns,
@@ -118,24 +141,37 @@ def open_parquet_file(
118
141
  max_gap=max_gap,
119
142
  max_block=max_block,
120
143
  footer_sample_size=footer_sample_size,
144
+ filters=filters,
121
145
  )
122
146
 
123
- # Extract file name from `data`
124
- fn = next(iter(data)) if data else path
125
-
126
147
  # Call self.open with "parts" caching
127
148
  options = kwargs.pop("cache_options", {}).copy()
128
- return fs.open(
129
- fn,
130
- mode=mode,
131
- cache_type="parts",
132
- cache_options={
133
- **options,
134
- "data": data.get(fn, {}),
135
- "strict": strict,
136
- },
137
- **kwargs,
138
- )
149
+ return [
150
+ AlreadyBufferedFile(
151
+ fs=None,
152
+ path=fn,
153
+ mode=mode,
154
+ cache_type="parts",
155
+ cache_options={
156
+ **options,
157
+ "data": data.get(fn, {}),
158
+ },
159
+ size=max(_[1] for _ in data.get(fn, {})),
160
+ **kwargs,
161
+ )
162
+ for fn in data
163
+ ]
164
+
165
+
166
+ def open_parquet_file(*args, **kwargs):
167
+ """Create files tailed to reading specific parts of parquet files
168
+
169
+ Please see ``open_parquet_files`` for details of the arguments. The
170
+ difference is, this function always returns a single ``AleadyBufferedFile``,
171
+ whereas `open_parquet_files`` always returns a list of files, even if
172
+ there are one or zero matching parquet files.
173
+ """
174
+ return open_parquet_files(*args, **kwargs)[0]
139
175
 
140
176
 
141
177
  def _get_parquet_byte_ranges(
@@ -148,6 +184,7 @@ def _get_parquet_byte_ranges(
148
184
  max_block=256_000_000,
149
185
  footer_sample_size=1_000_000,
150
186
  engine="auto",
187
+ filters=None,
151
188
  ):
152
189
  """Get a dictionary of the known byte ranges needed
153
190
  to read a specific column/row-group selection from a
@@ -172,6 +209,7 @@ def _get_parquet_byte_ranges(
172
209
  row_groups=row_groups,
173
210
  max_gap=max_gap,
174
211
  max_block=max_block,
212
+ filters=filters,
175
213
  )
176
214
 
177
215
  # Get file sizes asynchronously
@@ -183,17 +221,16 @@ def _get_parquet_byte_ranges(
183
221
  data_starts = []
184
222
  data_ends = []
185
223
  add_header_magic = True
186
- if columns is None and row_groups is None:
224
+ if columns is None and row_groups is None and filters is None:
187
225
  # We are NOT selecting specific columns or row-groups.
188
226
  #
189
227
  # We can avoid sampling the footers, and just transfer
190
228
  # all file data with cat_ranges
191
229
  for i, path in enumerate(paths):
192
230
  result[path] = {}
193
- for b in range(0, file_sizes[i], max_block):
194
- data_paths.append(path)
195
- data_starts.append(b)
196
- data_ends.append(min(b + max_block, file_sizes[i]))
231
+ data_paths.append(path)
232
+ data_starts.append(0)
233
+ data_ends.append(file_sizes[i])
197
234
  add_header_magic = False # "Magic" should already be included
198
235
  else:
199
236
  # We ARE selecting specific columns or row-groups.
@@ -235,29 +272,21 @@ def _get_parquet_byte_ranges(
235
272
 
236
273
  # Calculate required byte ranges for each path
237
274
  for i, path in enumerate(paths):
238
- # Deal with small-file case.
239
- # Just include all remaining bytes of the file
240
- # in a single range.
241
- if file_sizes[i] < max_block:
242
- if footer_starts[i] > 0:
243
- # Only need to transfer the data if the
244
- # footer sample isn't already the whole file
245
- data_paths.append(path)
246
- data_starts.append(0)
247
- data_ends.append(footer_starts[i])
248
- continue
249
-
250
275
  # Use "engine" to collect data byte ranges
251
276
  path_data_starts, path_data_ends = engine._parquet_byte_ranges(
252
277
  columns,
253
278
  row_groups=row_groups,
254
279
  footer=footer_samples[i],
255
280
  footer_start=footer_starts[i],
281
+ filters=filters,
256
282
  )
257
283
 
258
284
  data_paths += [path] * len(path_data_starts)
259
285
  data_starts += path_data_starts
260
286
  data_ends += path_data_ends
287
+ result.setdefault(path, {})[(footer_starts[i], file_sizes[i])] = (
288
+ footer_samples[i]
289
+ )
261
290
 
262
291
  # Merge adjacent offset ranges
263
292
  data_paths, data_starts, data_ends = merge_offset_ranges(
@@ -291,6 +320,7 @@ def _get_parquet_byte_ranges_from_metadata(
291
320
  row_groups=None,
292
321
  max_gap=64_000,
293
322
  max_block=256_000_000,
323
+ filters=None,
294
324
  ):
295
325
  """Simplified version of `_get_parquet_byte_ranges` for
296
326
  the case that an engine-specific `metadata` object is
@@ -300,9 +330,7 @@ def _get_parquet_byte_ranges_from_metadata(
300
330
 
301
331
  # Use "engine" to collect data byte ranges
302
332
  data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
303
- columns,
304
- row_groups=row_groups,
305
- metadata=metadata,
333
+ columns, row_groups=row_groups, metadata=metadata, filters=filters
306
334
  )
307
335
 
308
336
  # Merge adjacent offset ranges
@@ -401,16 +429,19 @@ class FastparquetEngine:
401
429
  metadata=None,
402
430
  footer=None,
403
431
  footer_start=None,
432
+ filters=None,
404
433
  ):
405
434
  # Initialize offset ranges and define ParqetFile metadata
406
435
  pf = metadata
407
436
  data_paths, data_starts, data_ends = [], [], []
437
+ if filters and row_groups:
438
+ raise ValueError("filters and row_groups cannot be used together")
408
439
  if pf is None:
409
440
  pf = self.fp.ParquetFile(io.BytesIO(footer))
410
441
 
411
442
  # Convert columns to a set and add any index columns
412
443
  # specified in the pandas metadata (just in case)
413
- column_set = None if columns is None else set(columns)
444
+ column_set = None if columns is None else {c.split(".", 1)[0] for c in columns}
414
445
  if column_set is not None and hasattr(pf, "pandas_metadata"):
415
446
  md_index = [
416
447
  ind
@@ -422,7 +453,12 @@ class FastparquetEngine:
422
453
 
423
454
  # Check if row_groups is a list of integers
424
455
  # or a list of row-group metadata
425
- if row_groups and not isinstance(row_groups[0], int):
456
+ if filters:
457
+ from fastparquet.api import filter_row_groups
458
+
459
+ row_group_indices = None
460
+ row_groups = filter_row_groups(pf, filters)
461
+ elif row_groups and not isinstance(row_groups[0], int):
426
462
  # Input row_groups contains row-group metadata
427
463
  row_group_indices = None
428
464
  else:
@@ -486,9 +522,12 @@ class PyarrowEngine:
486
522
  metadata=None,
487
523
  footer=None,
488
524
  footer_start=None,
525
+ filters=None,
489
526
  ):
490
527
  if metadata is not None:
491
528
  raise ValueError("metadata input not supported for PyarrowEngine")
529
+ if filters:
530
+ raise NotImplementedError
492
531
 
493
532
  data_starts, data_ends = [], []
494
533
  md = self.pq.ParquetFile(io.BytesIO(footer)).metadata
fsspec/registry.py CHANGED
@@ -72,6 +72,9 @@ known_implementations = {
72
72
  "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
73
  "err": "pyarrow and local java libraries required for HDFS",
74
74
  },
75
+ "async_wrapper": {
76
+ "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
77
+ },
75
78
  "asynclocal": {
76
79
  "class": "morefs.asyn_local.AsyncLocalFileSystem",
77
80
  "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
fsspec/spec.py CHANGED
@@ -67,6 +67,9 @@ class _Cached(type):
67
67
  extra_tokens = tuple(
68
68
  getattr(cls, attr, None) for attr in cls._extra_tokenize_attributes
69
69
  )
70
+ strip_tokenize_options = {
71
+ k: kwargs.pop(k) for k in cls._strip_tokenize_options if k in kwargs
72
+ }
70
73
  token = tokenize(
71
74
  cls, cls._pid, threading.get_ident(), *args, *extra_tokens, **kwargs
72
75
  )
@@ -78,7 +81,7 @@ class _Cached(type):
78
81
  cls._latest = token
79
82
  return cls._cache[token]
80
83
  else:
81
- obj = super().__call__(*args, **kwargs)
84
+ obj = super().__call__(*args, **kwargs, **strip_tokenize_options)
82
85
  # Setting _fs_token here causes some static linters to complain.
83
86
  obj._fs_token_ = token
84
87
  obj.storage_args = args
@@ -115,6 +118,8 @@ class AbstractFileSystem(metaclass=_Cached):
115
118
 
116
119
  #: Extra *class attributes* that should be considered when hashing.
117
120
  _extra_tokenize_attributes = ()
121
+ #: *storage options* that should not be considered when hashing.
122
+ _strip_tokenize_options = ()
118
123
 
119
124
  # Set by _Cached metaclass
120
125
  storage_args: tuple[Any, ...]
@@ -892,7 +897,7 @@ class AbstractFileSystem(metaclass=_Cached):
892
897
  dict of {path: contents} if there are multiple paths
893
898
  or the path has been otherwise expanded
894
899
  """
895
- paths = self.expand_path(path, recursive=recursive)
900
+ paths = self.expand_path(path, recursive=recursive, **kwargs)
896
901
  if (
897
902
  len(paths) > 1
898
903
  or isinstance(path, list)
@@ -972,7 +977,9 @@ class AbstractFileSystem(metaclass=_Cached):
972
977
  )
973
978
 
974
979
  source_is_str = isinstance(rpath, str)
975
- rpaths = self.expand_path(rpath, recursive=recursive, maxdepth=maxdepth)
980
+ rpaths = self.expand_path(
981
+ rpath, recursive=recursive, maxdepth=maxdepth, **kwargs
982
+ )
976
983
  if source_is_str and (not recursive or maxdepth is not None):
977
984
  # Non-recursive glob does not copy directories
978
985
  rpaths = [p for p in rpaths if not (trailing_sep(p) or self.isdir(p))]
@@ -1060,7 +1067,9 @@ class AbstractFileSystem(metaclass=_Cached):
1060
1067
  if source_is_str:
1061
1068
  lpath = make_path_posix(lpath)
1062
1069
  fs = LocalFileSystem()
1063
- lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
1070
+ lpaths = fs.expand_path(
1071
+ lpath, recursive=recursive, maxdepth=maxdepth, **kwargs
1072
+ )
1064
1073
  if source_is_str and (not recursive or maxdepth is not None):
1065
1074
  # Non-recursive glob does not copy directories
1066
1075
  lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
@@ -1131,7 +1140,9 @@ class AbstractFileSystem(metaclass=_Cached):
1131
1140
  from .implementations.local import trailing_sep
1132
1141
 
1133
1142
  source_is_str = isinstance(path1, str)
1134
- paths1 = self.expand_path(path1, recursive=recursive, maxdepth=maxdepth)
1143
+ paths1 = self.expand_path(
1144
+ path1, recursive=recursive, maxdepth=maxdepth, **kwargs
1145
+ )
1135
1146
  if source_is_str and (not recursive or maxdepth is not None):
1136
1147
  # Non-recursive glob does not copy directories
1137
1148
  paths1 = [p for p in paths1 if not (trailing_sep(p) or self.isdir(p))]
@@ -1172,7 +1183,7 @@ class AbstractFileSystem(metaclass=_Cached):
1172
1183
  raise ValueError("maxdepth must be at least 1")
1173
1184
 
1174
1185
  if isinstance(path, (str, os.PathLike)):
1175
- out = self.expand_path([path], recursive, maxdepth)
1186
+ out = self.expand_path([path], recursive, maxdepth, **kwargs)
1176
1187
  else:
1177
1188
  out = set()
1178
1189
  path = [self._strip_protocol(p) for p in path]
fsspec/utils.py CHANGED
@@ -7,23 +7,16 @@ import os
7
7
  import re
8
8
  import sys
9
9
  import tempfile
10
- from collections.abc import Iterable, Iterator, Sequence
10
+ from collections.abc import Callable, Iterable, Iterator, Sequence
11
11
  from functools import partial
12
12
  from hashlib import md5
13
13
  from importlib.metadata import version
14
- from typing import (
15
- IO,
16
- TYPE_CHECKING,
17
- Any,
18
- Callable,
19
- TypeVar,
20
- )
14
+ from typing import IO, TYPE_CHECKING, Any, TypeVar
21
15
  from urllib.parse import urlsplit
22
16
 
23
17
  if TYPE_CHECKING:
24
18
  import pathlib
25
-
26
- from typing_extensions import TypeGuard
19
+ from typing import TypeGuard
27
20
 
28
21
  from fsspec.spec import AbstractFileSystem
29
22
 
@@ -438,6 +431,14 @@ def get_protocol(url: str) -> str:
438
431
  return "file"
439
432
 
440
433
 
434
+ def get_file_extension(url: str) -> str:
435
+ url = stringify_path(url)
436
+ ext_parts = url.rsplit(".", 1)
437
+ if len(ext_parts) > 1:
438
+ return ext_parts[-1]
439
+ return ""
440
+
441
+
441
442
  def can_be_local(path: str) -> bool:
442
443
  """Can the given URL be used with open_local?"""
443
444
  from fsspec import get_filesystem_class
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2025.9.0
3
+ Version: 2025.12.0
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -12,12 +12,12 @@ Keywords: file
12
12
  Classifier: Development Status :: 4 - Beta
13
13
  Classifier: Intended Audience :: Developers
14
14
  Classifier: Operating System :: OS Independent
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: 3.12
19
18
  Classifier: Programming Language :: Python :: 3.13
20
- Requires-Python: >=3.9
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Requires-Python: >=3.10
21
21
  Provides-Extra: abfs
22
22
  Requires-Dist: adlfs; extra == 'abfs'
23
23
  Provides-Extra: adl
@@ -197,7 +197,7 @@ CI runtime. For local use, pick a version suitable for you.
197
197
 
198
198
  ```bash
199
199
  # For a new environment (mamba / conda).
200
- mamba create -n fsspec -c conda-forge python=3.9 -y
200
+ mamba create -n fsspec -c conda-forge python=3.10 -y
201
201
  conda activate fsspec
202
202
 
203
203
  # Standard dev install with docs and tests.