PyPI - fsspec - Versions diffs - 2025.10.0__tar.gz → 2025.12.0__tar.gz - Mend

fsspec 2025.10.0tar.gz → 2025.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

{fsspec-2025.10.0 → fsspec-2025.12.0}/.github/workflows/main.yaml RENAMED Viewed

@@ -14,18 +14,18 @@ jobs:
       fail-fast: false
       matrix:
         PY:
-          - "3.9"
           - "3.10"
           - "3.11"
           - "3.12"
           - "3.13"
+          - "3.14"
     env:
       CIRUN: true
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           fetch-depth: 0
@@ -50,7 +50,7 @@ jobs:
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           fetch-depth: 0
@@ -81,7 +81,7 @@ jobs:
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           fetch-depth: 0
@@ -124,7 +124,7 @@ jobs:
     steps:
       - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
       - name: Setup conda
         uses: conda-incubator/setup-miniconda@v3
@@ -145,5 +145,5 @@ jobs:
         shell: bash -l {0}
         run: |
           cd ${{ matrix.FRIEND }}
-          pytest -v
+          pytest -v -W ignore::pytest.PytestRemovedIn9Warning
           cd ..

{fsspec-2025.10.0 → fsspec-2025.12.0}/.github/workflows/pypipublish.yaml RENAMED Viewed

@@ -8,9 +8,9 @@ jobs:
   deploy:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
         with:
           python-version: "3.x"
       - name: Install dependencies

{fsspec-2025.10.0 → fsspec-2025.12.0}/.pre-commit-config.yaml RENAMED Viewed

@@ -13,9 +13,8 @@ repos:
       - id: check-json
       - id: check-yaml
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.2
+    rev: v0.14.3
     hooks:
-      # Run the linter.
       - id: ruff-check
         args: [ --fix, "--show-fixes"]
       - id: ruff-format

{fsspec-2025.10.0 → fsspec-2025.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fsspec
-Version: 2025.10.0
+Version: 2025.12.0
 Summary: File-system specification
 Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
 Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -12,12 +12,12 @@ Keywords: file
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
-Requires-Python: >=3.9
+Classifier: Programming Language :: Python :: 3.14
+Requires-Python: >=3.10
 Provides-Extra: abfs
 Requires-Dist: adlfs; extra == 'abfs'
 Provides-Extra: adl
@@ -197,7 +197,7 @@ CI runtime. For local use, pick a version suitable for you.
 ```bash
 # For a new environment (mamba / conda).
-mamba create -n fsspec -c conda-forge  python=3.9 -y
+mamba create -n fsspec -c conda-forge  python=3.10 -y
 conda activate fsspec
 # Standard dev install with docs and tests.

{fsspec-2025.10.0 → fsspec-2025.12.0}/README.md RENAMED Viewed

@@ -47,7 +47,7 @@ CI runtime. For local use, pick a version suitable for you.
 ```bash
 # For a new environment (mamba / conda).
-mamba create -n fsspec -c conda-forge  python=3.9 -y
+mamba create -n fsspec -c conda-forge  python=3.10 -y
 conda activate fsspec
 # Standard dev install with docs and tests.

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/environment.yml RENAMED Viewed

@@ -2,4 +2,4 @@ name: fsspec
 channels:
   - defaults
 dependencies:
-  - python=3.9
+  - python=3.10

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/changelog.rst RENAMED Viewed

@@ -1,6 +1,26 @@
 Changelog
 =========
+2025.12.0
+---------
+Enhancements
+- fsspec.parquet to support filters and multiple files (#1945)
+Fixes
+- passing withdirs in aync _glob() (#1953)
+- fix _rm_file/_rm redirection in async (#1951)
+- allow arrowFile to be seekable (#1950)
+- add size attribute to arrowFile (#1944)
+Other
+- support py3.14 and drop 3.9 (#1946)
+- avoid ruff warning (#1942)
 2025.10.0
 ---------

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '2025.10.0'
-__version_tuple__ = version_tuple = (2025, 10, 0)
+__version__ = version = '2025.12.0'
+__version_tuple__ = version_tuple = (2025, 12, 0)
 __commit_id__ = commit_id = None

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/asyn.py RENAMED Viewed

@@ -328,6 +328,11 @@ class AsyncFileSystem(AbstractFileSystem):
         return self._loop
     async def _rm_file(self, path, **kwargs):
+        if (
+            inspect.iscoroutinefunction(self._rm)
+            and type(self)._rm is not AsyncFileSystem._rm
+        ):
+            return await self._rm(path, recursive=False, batch_size=1, **kwargs)
         raise NotImplementedError
     async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
@@ -776,6 +781,7 @@ class AsyncFileSystem(AbstractFileSystem):
         min_idx = min(idx_star, idx_qmark, idx_brace)
         detail = kwargs.pop("detail", False)
+        withdirs = kwargs.pop("withdirs", True)
         if not has_magic(path):
             if await self._exists(path, **kwargs):
@@ -805,7 +811,7 @@ class AsyncFileSystem(AbstractFileSystem):
                 depth = None
         allpaths = await self._find(
-            root, maxdepth=depth, withdirs=True, detail=True, **kwargs
+            root, maxdepth=depth, withdirs=withdirs, detail=True, **kwargs
         )
         pattern = glob_translate(path + ("/" if ends_with_sep else ""))

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/caching.py RENAMED Viewed

@@ -6,20 +6,12 @@ import logging
 import math
 import os
 import threading
-import warnings
 from collections import OrderedDict
+from collections.abc import Callable
 from concurrent.futures import Future, ThreadPoolExecutor
 from itertools import groupby
 from operator import itemgetter
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    ClassVar,
-    Generic,
-    NamedTuple,
-    TypeVar,
-)
+from typing import TYPE_CHECKING, Any, ClassVar, Generic, NamedTuple, TypeVar
 if TYPE_CHECKING:
     import mmap
@@ -629,7 +621,7 @@ class KnownPartsOfAFile(BaseCache):
         fetcher: Fetcher,
         size: int,
         data: dict[tuple[int, int], bytes] | None = None,
-        strict: bool = True,
+        strict: bool = False,
         **_: Any,
     ):
         super().__init__(blocksize, fetcher, size)
@@ -653,50 +645,65 @@ class KnownPartsOfAFile(BaseCache):
         else:
             self.data = {}
+    @property
+    def size(self):
+        return sum(_[1] - _[0] for _ in self.data)
+    @size.setter
+    def size(self, value):
+        pass
+    @property
+    def nblocks(self):
+        return len(self.data)
+    @nblocks.setter
+    def nblocks(self, value):
+        pass
     def _fetch(self, start: int | None, stop: int | None) -> bytes:
         if start is None:
             start = 0
         if stop is None:
             stop = self.size
+        self.total_requested_bytes += stop - start
         out = b""
-        for (loc0, loc1), data in self.data.items():
-            # If self.strict=False, use zero-padded data
-            # for reads beyond the end of a "known" buffer
+        started = False
+        loc_old = 0
+        for loc0, loc1 in sorted(self.data):
+            if (loc0 <= start < loc1) and (loc0 <= stop <= loc1):
+                # entirely within the block
+                off = start - loc0
+                self.hit_count += 1
+                return self.data[(loc0, loc1)][off : off + stop - start]
+            if stop <= loc0:
+                break
+            if started and loc0 > loc_old:
+                # a gap where we need data
+                self.miss_count += 1
+                if self.strict:
+                    raise ValueError
+                out += b"\x00" * (loc0 - loc_old)
             if loc0 <= start < loc1:
+                # found the start
+                self.hit_count += 1
                 off = start - loc0
-                out = data[off : off + stop - start]
-                if not self.strict or loc0 <= stop <= loc1:
-                    # The request is within a known range, or
-                    # it begins within a known range, and we
-                    # are allowed to pad reads beyond the
-                    # buffer with zero
-                    out += b"\x00" * (stop - start - len(out))
-                    self.hit_count += 1
-                    return out
-                else:
-                    # The request ends outside a known range,
-                    # and we are being "strict" about reads
-                    # beyond the buffer
-                    start = loc1
-                    break
-        # We only get here if there is a request outside the
-        # known parts of the file. In an ideal world, this
-        # should never happen
-        if self.fetcher is None:
-            # We cannot fetch the data, so raise an error
-            raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
-        # We can fetch the data, but should warn the user
-        # that this may be slow
-        warnings.warn(
-            f"Read is outside the known file parts: {(start, stop)}. "
-            f"IO/caching performance may be poor!"
-        )
-        logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
-        self.total_requested_bytes += stop - start
+                out = self.data[(loc0, loc1)][off : off + stop - start]
+                started = True
+            elif start < loc0 and stop > loc1:
+                # the whole block
+                self.hit_count += 1
+                out += self.data[(loc0, loc1)]
+            elif loc0 <= stop <= loc1:
+                # end block
+                self.hit_count += 1
+                return out + self.data[(loc0, loc1)][: stop - loc0]
+            loc_old = loc1
         self.miss_count += 1
-        return out + super()._fetch(start, stop)
+        if started and not self.strict:
+            return out + b"\x00" * (stop - loc_old)
+        raise ValueError
 class UpdatableLRU(Generic[P, T]):

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/core.py RENAMED Viewed

@@ -18,7 +18,7 @@ from fsspec.caching import (  # noqa: F401
 )
 from fsspec.compression import compr
 from fsspec.config import conf
-from fsspec.registry import filesystem, get_filesystem_class
+from fsspec.registry import available_protocols, filesystem, get_filesystem_class
 from fsspec.utils import (
     _unstrip_protocol,
     build_name_function,
@@ -334,34 +334,51 @@ def _un_chain(path, kwargs):
     if "::" in path:
         x = re.compile(".*[^a-z]+.*")  # test for non protocol-like single word
+        known_protocols = set(available_protocols())
         bits = []
+        # split on '::', then ensure each bit has a protocol
         for p in path.split("::"):
-            if "://" in p or x.match(p):
+            if p in known_protocols:
+                bits.append(p + "://")
+            elif "://" in p or x.match(p):
                 bits.append(p)
             else:
                 bits.append(p + "://")
     else:
         bits = [path]
     # [[url, protocol, kwargs], ...]
     out = []
     previous_bit = None
     kwargs = kwargs.copy()
     for bit in reversed(bits):
         protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
         cls = get_filesystem_class(protocol)
         extra_kwargs = cls._get_kwargs_from_urls(bit)
         kws = kwargs.pop(protocol, {})
         if bit is bits[0]:
             kws.update(kwargs)
         kw = dict(
             **{k: v for k, v in extra_kwargs.items() if k not in kws or v != kws[k]},
             **kws,
         )
         bit = cls._strip_protocol(bit)
-        if "target_protocol" not in kw and issubclass(cls, ChainedFileSystem):
+        if (
+            "target_protocol" not in kw
+            and issubclass(cls, ChainedFileSystem)
+            and not bit
+        ):
+            # replace bit if we are chaining and no path given
             bit = previous_bit
         out.append((bit, protocol, kw))
         previous_bit = bit
     out.reverse()
     return out

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/arrow.py RENAMED Viewed

@@ -205,11 +205,11 @@ class ArrowFSWrapper(AbstractFileSystem):
         return self.fs.get_file_info(path).mtime
     def cat_file(self, path, start=None, end=None, **kwargs):
-        kwargs["seekable"] = start not in [None, 0]
+        kwargs.setdefault("seekable", start not in [None, 0])
         return super().cat_file(path, start=None, end=None, **kwargs)
     def get_file(self, rpath, lpath, **kwargs):
-        kwargs["seekable"] = False
+        kwargs.setdefault("seekable", False)
         super().get_file(rpath, lpath, **kwargs)
@@ -223,7 +223,6 @@ class ArrowFSWrapper(AbstractFileSystem):
         "readable",
         "writable",
         "close",
-        "size",
         "seekable",
     ],
 )
@@ -241,6 +240,10 @@ class ArrowFile(io.IOBase):
     def __enter__(self):
         return self
+    @property
+    def size(self):
+        return self.stream.size()
     def __exit__(self, *args):
         return self.close()

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/asyn_wrapper.py RENAMED Viewed

@@ -5,6 +5,8 @@ import inspect
 import fsspec
 from fsspec.asyn import AsyncFileSystem, running_async
+from .chained import ChainedFileSystem
 def async_wrapper(func, obj=None, semaphore=None):
     """
@@ -35,7 +37,7 @@ def async_wrapper(func, obj=None, semaphore=None):
     return wrapper
-class AsyncFileSystemWrapper(AsyncFileSystem):
+class AsyncFileSystemWrapper(AsyncFileSystem, ChainedFileSystem):
     """
     A wrapper class to convert a synchronous filesystem into an asynchronous one.

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/cache_metadata.py RENAMED Viewed

@@ -15,9 +15,7 @@ except ImportError:
 if TYPE_CHECKING:
     from collections.abc import Iterator
-    from typing import Any, Literal
-    from typing_extensions import TypeAlias
+    from typing import Any, Literal, TypeAlias
     from .cached import CachingFileSystem

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/cached.py RENAMED Viewed

@@ -6,8 +6,9 @@ import os
 import tempfile
 import time
 import weakref
+from collections.abc import Callable
 from shutil import rmtree
-from typing import TYPE_CHECKING, Any, Callable, ClassVar
+from typing import TYPE_CHECKING, Any, ClassVar
 from fsspec import filesystem
 from fsspec.callbacks import DEFAULT_CALLBACK

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/data.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import base64
 import io
-from typing import Optional
 from urllib.parse import unquote
 from fsspec import AbstractFileSystem
@@ -50,7 +49,7 @@ class DataFileSystem(AbstractFileSystem):
         return io.BytesIO(self.cat_file(path))
     @staticmethod
-    def encode(data: bytes, mime: Optional[str] = None):
+    def encode(data: bytes, mime: str | None = None):
         """Format the given data into data-URL syntax
         This version always base64 encodes, even when the data is ascii/url-safe.

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/dirfs.py RENAMED Viewed

@@ -1,8 +1,9 @@
 from .. import filesystem
 from ..asyn import AsyncFileSystem
+from .chained import ChainedFileSystem
-class DirFileSystem(AsyncFileSystem):
+class DirFileSystem(AsyncFileSystem, ChainedFileSystem):
     """Directory prefix filesystem
     The DirFileSystem is a filesystem-wrapper. It assumes every path it is dealing with

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/http.py RENAMED Viewed

@@ -327,7 +327,7 @@ class HTTPFileSystem(AsyncFileSystem):
         async with meth(self.encode_url(rpath), data=gen_chunks(), **kw) as resp:
             self._raise_not_found_for_status(resp, rpath)
-    async def _exists(self, path, **kwargs):
+    async def _exists(self, path, strict=False, **kwargs):
         kw = self.kwargs.copy()
         kw.update(kwargs)
         try:
@@ -335,8 +335,14 @@ class HTTPFileSystem(AsyncFileSystem):
             session = await self.set_session()
             r = await session.get(self.encode_url(path), **kw)
             async with r:
+                if strict:
+                    self._raise_not_found_for_status(r, path)
                 return r.status < 400
+        except FileNotFoundError:
+            return False
         except aiohttp.ClientError:
+            if strict:
+                raise
             return False
     async def _isfile(self, path, **kwargs):

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/http_sync.py RENAMED Viewed

@@ -463,14 +463,20 @@ class HTTPFileSystem(AbstractFileSystem):
                 end -= 1  # bytes range is inclusive
         return f"bytes={start}-{end}"
-    def exists(self, path, **kwargs):
+    def exists(self, path, strict=False, **kwargs):
         kw = self.kwargs.copy()
         kw.update(kwargs)
         try:
             logger.debug(path)
             r = self.session.get(self.encode_url(path), **kw)
+            if strict:
+                self._raise_not_found_for_status(r, path)
             return r.status_code < 400
+        except FileNotFoundError:
+            return False
         except Exception:
+            if strict:
+                raise
             return False
     def isfile(self, path, **kwargs):

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/libarchive.py RENAMED Viewed

@@ -195,7 +195,7 @@ class LibArchiveFileSystem(AbstractArchiveFileSystem):
         if mode != "rb":
             raise NotImplementedError
-        data = bytes()
+        data = b""
         with self._open_archive() as arc:
             for entry in arc:
                 if entry.pathname != path:

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/reference.py RENAMED Viewed

@@ -219,7 +219,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
         fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
         return LazyReferenceMapper(root, fs, **kwargs)
-    @lru_cache()
+    @lru_cache
     def listdir(self):
         """List top-level directories"""
         dirs = (p.rsplit("/", 1)[0] for p in self.zmetadata if not p.startswith(".z"))

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/json.py RENAMED Viewed

@@ -1,13 +1,8 @@
 import json
-from collections.abc import Mapping, Sequence
+from collections.abc import Callable, Mapping, Sequence
 from contextlib import suppress
 from pathlib import PurePath
-from typing import (
-    Any,
-    Callable,
-    ClassVar,
-    Optional,
-)
+from typing import Any, ClassVar
 from .registry import _import_class, get_filesystem_class
 from .spec import AbstractFileSystem
@@ -45,12 +40,12 @@ class FilesystemJSONDecoder(json.JSONDecoder):
     def __init__(
         self,
         *,
-        object_hook: Optional[Callable[[dict[str, Any]], Any]] = None,
-        parse_float: Optional[Callable[[str], Any]] = None,
-        parse_int: Optional[Callable[[str], Any]] = None,
-        parse_constant: Optional[Callable[[str], Any]] = None,
+        object_hook: Callable[[dict[str, Any]], Any] | None = None,
+        parse_float: Callable[[str], Any] | None = None,
+        parse_int: Callable[[str], Any] | None = None,
+        parse_constant: Callable[[str], Any] | None = None,
         strict: bool = True,
-        object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None,
+        object_pairs_hook: Callable[[list[tuple[str, Any]]], Any] | None = None,
     ) -> None:
         self.original_object_hook = object_hook

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/parquet.py RENAMED Viewed

@@ -1,8 +1,12 @@
 import io
 import json
 import warnings
+from typing import Literal
+import fsspec
 from .core import url_to_fs
+from .spec import AbstractBufferedFile
 from .utils import merge_offset_ranges
 # Parquet-Specific Utilities for fsspec
@@ -14,19 +18,24 @@ from .utils import merge_offset_ranges
 # on remote file systems.
-def open_parquet_file(
-    path,
-    mode="rb",
-    fs=None,
+class AlreadyBufferedFile(AbstractBufferedFile):
+    def _fetch_range(self, start, end):
+        raise NotImplementedError
+def open_parquet_files(
+    path: list[str],
+    mode: Literal["rb"] = "rb",
+    fs: None | fsspec.AbstractFileSystem = None,
     metadata=None,
-    columns=None,
-    row_groups=None,
-    storage_options=None,
-    strict=False,
-    engine="auto",
-    max_gap=64_000,
-    max_block=256_000_000,
-    footer_sample_size=1_000_000,
+    columns: None | list[str] = None,
+    row_groups: None | list[int] = None,
+    storage_options: None | dict = None,
+    engine: str = "auto",
+    max_gap: int = 64_000,
+    max_block: int = 256_000_000,
+    footer_sample_size: int = 1_000_000,
+    filters: None | list[list[list[str]]] = None,
     **kwargs,
 ):
     """
@@ -72,12 +81,6 @@ def open_parquet_file(
     storage_options : dict, optional
         Used to generate an `AbstractFileSystem` object if `fs` was
         not specified.
-    strict : bool, optional
-        Whether the resulting `KnownPartsOfAFile` cache should
-        fetch reads that go beyond a known byte-range boundary.
-        If `False` (the default), any read that ends outside a
-        known part will be zero padded. Note that using
-        `strict=True` may be useful for debugging.
     max_gap : int, optional
         Neighboring byte ranges will only be merged when their
         inter-range gap is <= `max_gap`. Default is 64KB.
@@ -89,6 +92,10 @@ def open_parquet_file(
         for the footer metadata. If the sampled bytes do not contain
         the footer, a second read request will be required, and
         performance will suffer. Default is 1MB.
+    filters : list[list], optional
+        List of filters to apply to prevent reading row groups, of the
+        same format as accepted by the loading engines. Ignored if
+        ``row_groups`` is specified.
     **kwargs :
         Optional key-word arguments to pass to `fs.open`
     """
@@ -96,20 +103,36 @@ def open_parquet_file(
     # Make sure we have an `AbstractFileSystem` object
     # to work with
     if fs is None:
-        fs = url_to_fs(path, **(storage_options or {}))[0]
+        path0 = path
+        if isinstance(path, (list, tuple)):
+            path = path[0]
+        fs, path = url_to_fs(path, **(storage_options or {}))
+    else:
+        path0 = path
-    # For now, `columns == []` not supported. Just use
-    # default `open` command with `path` input
+    # For now, `columns == []` not supported, is the same
+    # as all columns
     if columns is not None and len(columns) == 0:
-        return fs.open(path, mode=mode)
+        columns = None
     # Set the engine
     engine = _set_engine(engine)
-    # Fetch the known byte ranges needed to read
-    # `columns` and/or `row_groups`
+    if isinstance(path0, (list, tuple)):
+        paths = path0
+    elif "*" in path:
+        paths = fs.glob(path)
+    elif path0.endswith("/"):  # or fs.isdir(path):
+        paths = [
+            _
+            for _ in fs.find(path, withdirs=False, detail=False)
+            if _.endswith((".parquet", ".parq"))
+        ]
+    else:
+        paths = [path]
     data = _get_parquet_byte_ranges(
-        [path],
+        paths,
         fs,
         metadata=metadata,
         columns=columns,
@@ -118,24 +141,37 @@ def open_parquet_file(
         max_gap=max_gap,
         max_block=max_block,
         footer_sample_size=footer_sample_size,
+        filters=filters,
     )
-    # Extract file name from `data`
-    fn = next(iter(data)) if data else path
     # Call self.open with "parts" caching
     options = kwargs.pop("cache_options", {}).copy()
-    return fs.open(
-        fn,
-        mode=mode,
-        cache_type="parts",
-        cache_options={
-            **options,
-            "data": data.get(fn, {}),
-            "strict": strict,
-        },
-        **kwargs,
-    )
+    return [
+        AlreadyBufferedFile(
+            fs=None,
+            path=fn,
+            mode=mode,
+            cache_type="parts",
+            cache_options={
+                **options,
+                "data": data.get(fn, {}),
+            },
+            size=max(_[1] for _ in data.get(fn, {})),
+            **kwargs,
+        )
+        for fn in data
+    ]
+def open_parquet_file(*args, **kwargs):
+    """Create files tailed to reading specific parts of parquet files
+    Please see ``open_parquet_files`` for details of the arguments. The
+    difference is, this function always returns a single ``AleadyBufferedFile``,
+    whereas `open_parquet_files`` always returns a list of files, even if
+    there are one or zero matching parquet files.
+    """
+    return open_parquet_files(*args, **kwargs)[0]
 def _get_parquet_byte_ranges(
@@ -148,6 +184,7 @@ def _get_parquet_byte_ranges(
     max_block=256_000_000,
     footer_sample_size=1_000_000,
     engine="auto",
+    filters=None,
 ):
     """Get a dictionary of the known byte ranges needed
     to read a specific column/row-group selection from a
@@ -172,6 +209,7 @@ def _get_parquet_byte_ranges(
             row_groups=row_groups,
             max_gap=max_gap,
             max_block=max_block,
+            filters=filters,
         )
     # Get file sizes asynchronously
@@ -183,17 +221,16 @@ def _get_parquet_byte_ranges(
     data_starts = []
     data_ends = []
     add_header_magic = True
-    if columns is None and row_groups is None:
+    if columns is None and row_groups is None and filters is None:
         # We are NOT selecting specific columns or row-groups.
         #
         # We can avoid sampling the footers, and just transfer
         # all file data with cat_ranges
         for i, path in enumerate(paths):
             result[path] = {}
-            for b in range(0, file_sizes[i], max_block):
-                data_paths.append(path)
-                data_starts.append(b)
-                data_ends.append(min(b + max_block, file_sizes[i]))
+            data_paths.append(path)
+            data_starts.append(0)
+            data_ends.append(file_sizes[i])
         add_header_magic = False  # "Magic" should already be included
     else:
         # We ARE selecting specific columns or row-groups.
@@ -235,29 +272,21 @@ def _get_parquet_byte_ranges(
         # Calculate required byte ranges for each path
         for i, path in enumerate(paths):
-            # Deal with small-file case.
-            # Just include all remaining bytes of the file
-            # in a single range.
-            if file_sizes[i] < max_block:
-                if footer_starts[i] > 0:
-                    # Only need to transfer the data if the
-                    # footer sample isn't already the whole file
-                    data_paths.append(path)
-                    data_starts.append(0)
-                    data_ends.append(footer_starts[i])
-                continue
             # Use "engine" to collect data byte ranges
             path_data_starts, path_data_ends = engine._parquet_byte_ranges(
                 columns,
                 row_groups=row_groups,
                 footer=footer_samples[i],
                 footer_start=footer_starts[i],
+                filters=filters,
             )
             data_paths += [path] * len(path_data_starts)
             data_starts += path_data_starts
             data_ends += path_data_ends
+            result.setdefault(path, {})[(footer_starts[i], file_sizes[i])] = (
+                footer_samples[i]
+            )
         # Merge adjacent offset ranges
         data_paths, data_starts, data_ends = merge_offset_ranges(
@@ -291,6 +320,7 @@ def _get_parquet_byte_ranges_from_metadata(
     row_groups=None,
     max_gap=64_000,
     max_block=256_000_000,
+    filters=None,
 ):
     """Simplified version of `_get_parquet_byte_ranges` for
     the case that an engine-specific `metadata` object is
@@ -300,9 +330,7 @@ def _get_parquet_byte_ranges_from_metadata(
     # Use "engine" to collect data byte ranges
     data_paths, data_starts, data_ends = engine._parquet_byte_ranges(
-        columns,
-        row_groups=row_groups,
-        metadata=metadata,
+        columns, row_groups=row_groups, metadata=metadata, filters=filters
     )
     # Merge adjacent offset ranges
@@ -401,16 +429,19 @@ class FastparquetEngine:
         metadata=None,
         footer=None,
         footer_start=None,
+        filters=None,
     ):
         # Initialize offset ranges and define ParqetFile metadata
         pf = metadata
         data_paths, data_starts, data_ends = [], [], []
+        if filters and row_groups:
+            raise ValueError("filters and row_groups cannot be used together")
         if pf is None:
             pf = self.fp.ParquetFile(io.BytesIO(footer))
         # Convert columns to a set and add any index columns
         # specified in the pandas metadata (just in case)
-        column_set = None if columns is None else set(columns)
+        column_set = None if columns is None else {c.split(".", 1)[0] for c in columns}
         if column_set is not None and hasattr(pf, "pandas_metadata"):
             md_index = [
                 ind
@@ -422,7 +453,12 @@ class FastparquetEngine:
         # Check if row_groups is a list of integers
         # or a list of row-group metadata
-        if row_groups and not isinstance(row_groups[0], int):
+        if filters:
+            from fastparquet.api import filter_row_groups
+            row_group_indices = None
+            row_groups = filter_row_groups(pf, filters)
+        elif row_groups and not isinstance(row_groups[0], int):
             # Input row_groups contains row-group metadata
             row_group_indices = None
         else:
@@ -486,9 +522,12 @@ class PyarrowEngine:
         metadata=None,
         footer=None,
         footer_start=None,
+        filters=None,
     ):
         if metadata is not None:
             raise ValueError("metadata input not supported for PyarrowEngine")
+        if filters:
+            raise NotImplementedError
         data_starts, data_ends = [], []
         md = self.pq.ParquetFile(io.BytesIO(footer)).metadata

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/registry.py RENAMED Viewed

@@ -72,6 +72,9 @@ known_implementations = {
         "class": "fsspec.implementations.arrow.HadoopFileSystem",
         "err": "pyarrow and local java libraries required for HDFS",
     },
+    "async_wrapper": {
+        "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
+    },
     "asynclocal": {
         "class": "morefs.asyn_local.AsyncLocalFileSystem",
         "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/utils.py RENAMED Viewed

@@ -7,23 +7,16 @@ import os
 import re
 import sys
 import tempfile
-from collections.abc import Iterable, Iterator, Sequence
+from collections.abc import Callable, Iterable, Iterator, Sequence
 from functools import partial
 from hashlib import md5
 from importlib.metadata import version
-from typing import (
-    IO,
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    TypeVar,
-)
+from typing import IO, TYPE_CHECKING, Any, TypeVar
 from urllib.parse import urlsplit
 if TYPE_CHECKING:
     import pathlib
-    from typing_extensions import TypeGuard
+    from typing import TypeGuard
     from fsspec.spec import AbstractFileSystem

{fsspec-2025.10.0 → fsspec-2025.12.0}/pyproject.toml RENAMED Viewed

@@ -9,18 +9,18 @@ description = "File-system specification"
 readme = "README.md"
 license = "BSD-3-Clause"
 license-files = ["LICENSE"]
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 maintainers = [{ name = "Martin Durant", email = "mdurant@anaconda.com" }]
 keywords = ["file"]
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
     "Operating System :: OS Independent",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
 ]
 [project.optional-dependencies]
@@ -194,6 +194,8 @@ ignore = [
 	"B026",
 	# No explicit `stacklevel` keyword argument found
 	"B028",
+    # `zip` without explicit `strict` keyword
+    "B905",
     # Assigning lambda expression
     "E731",
     # Ambiguous variable names
@@ -220,8 +222,6 @@ ignore = [
     "SIM115",
     "SIM117",
     "TC003",
-    # https://github.com/astral-sh/ruff/issues/7871
-    "UP038",
     # https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
     "W191",
     "E111",

{fsspec-2025.10.0 → fsspec-2025.12.0}/.codespellrc RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/.coveragerc RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/.gitattributes RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/.gitignore RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/LICENSE RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/ci/environment-downstream.yml RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/ci/environment-friends.yml RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/ci/environment-linux.yml RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/ci/environment-win.yml RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/Makefile RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/README.md RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/make.bat RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/_static/custom.css RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/api.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/async.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/code-of-conduct.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/conf.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/copying.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/developer.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/features.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/img/gui.png RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/index.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/intro.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/docs/source/usage.rst RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/__init__.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/archive.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/callbacks.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/compression.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/config.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/conftest.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/dircache.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/exceptions.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/fuse.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/generic.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/gui.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/__init__.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/cache_mapper.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/chained.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/dask.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/dbfs.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/ftp.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/gist.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/git.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/github.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/jupyter.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/local.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/memory.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/sftp.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/smb.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/tar.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/webhdfs.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/implementations/zip.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/mapping.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/spec.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/__init__.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/common.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/copy.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/get.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/mv.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/open.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/pipe.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/tests/abstract/put.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/fsspec/transaction.py RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/install_s3fs.sh RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/readthedocs.yml RENAMED Viewed

File without changes

{fsspec-2025.10.0 → fsspec-2025.12.0}/setup.cfg RENAMED Viewed

File without changes

fsspec 2025.10.0__tar.gz → 2025.12.0__tar.gz

fsspec 2025.10.0tar.gz → 2025.12.0tar.gz