fsspec 2024.10.0__tar.gz → 2025.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.github/workflows/main.yaml +16 -8
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.pre-commit-config.yaml +6 -2
- {fsspec-2024.10.0 → fsspec-2025.2.0}/PKG-INFO +3 -3
- {fsspec-2024.10.0 → fsspec-2025.2.0}/ci/environment-friends.yml +4 -2
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/api.rst +44 -12
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/async.rst +34 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/changelog.rst +44 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/features.rst +18 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/_version.py +2 -2
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/archive.py +3 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/asyn.py +5 -7
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/caching.py +34 -19
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/core.py +15 -13
- fsspec-2025.2.0/fsspec/implementations/asyn_wrapper.py +99 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/cached.py +1 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/dbfs.py +3 -3
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/ftp.py +1 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/http.py +4 -22
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/local.py +6 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/memory.py +8 -3
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/reference.py +124 -17
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/webhdfs.py +2 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/mapping.py +1 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/parquet.py +1 -1
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/registry.py +7 -3
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/spec.py +209 -33
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/tests/abstract/__init__.py +3 -1
- fsspec-2025.2.0/fsspec/tests/abstract/open.py +11 -0
- fsspec-2025.2.0/fsspec/tests/abstract/pipe.py +11 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/utils.py +4 -2
- {fsspec-2024.10.0 → fsspec-2025.2.0}/pyproject.toml +15 -3
- fsspec-2024.10.0/.github/workflows/codespell.yml +0 -19
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.codespellrc +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.coveragerc +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.gitattributes +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.github/workflows/pypipublish.yaml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/.gitignore +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/LICENSE +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/README.md +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/ci/environment-downstream.yml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/ci/environment-py38.yml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/ci/environment-typecheck.yml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/ci/environment-win.yml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/Makefile +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/README.md +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/environment.yml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/make.bat +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/_static/custom.css +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/conf.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/copying.rst +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/developer.rst +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/img/gui.png +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/index.rst +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/intro.rst +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/docs/source/usage.rst +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/__init__.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/callbacks.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/compression.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/config.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/conftest.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/dircache.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/exceptions.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/fuse.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/generic.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/gui.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/__init__.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/arrow.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/cache_mapper.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/cache_metadata.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/dask.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/data.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/dirfs.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/git.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/github.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/jupyter.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/libarchive.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/sftp.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/smb.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/tar.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/implementations/zip.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/json.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/tests/abstract/common.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/tests/abstract/copy.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/tests/abstract/get.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/tests/abstract/mv.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/tests/abstract/put.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/fsspec/transaction.py +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/install_s3fs.sh +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/readthedocs.yml +0 -0
- {fsspec-2024.10.0 → fsspec-2025.2.0}/setup.cfg +0 -0
|
@@ -9,11 +9,16 @@ on:
|
|
|
9
9
|
jobs:
|
|
10
10
|
linux:
|
|
11
11
|
name: ${{ matrix.PY }}-pytest
|
|
12
|
-
runs-on: ubuntu-
|
|
12
|
+
runs-on: ubuntu-24.04
|
|
13
13
|
strategy:
|
|
14
14
|
fail-fast: false
|
|
15
15
|
matrix:
|
|
16
|
-
PY:
|
|
16
|
+
PY:
|
|
17
|
+
- "3.9"
|
|
18
|
+
- "3.10"
|
|
19
|
+
- "3.11"
|
|
20
|
+
- "3.12"
|
|
21
|
+
- "3.13"
|
|
17
22
|
|
|
18
23
|
env:
|
|
19
24
|
CIRUN: true
|
|
@@ -62,7 +67,7 @@ jobs:
|
|
|
62
67
|
|
|
63
68
|
lint:
|
|
64
69
|
name: lint
|
|
65
|
-
runs-on: ubuntu-
|
|
70
|
+
runs-on: ubuntu-24.04
|
|
66
71
|
steps:
|
|
67
72
|
- uses: actions/checkout@main
|
|
68
73
|
- uses: actions/setup-python@main
|
|
@@ -88,7 +93,7 @@ jobs:
|
|
|
88
93
|
#
|
|
89
94
|
downstream:
|
|
90
95
|
name: downstream
|
|
91
|
-
runs-on: ubuntu-
|
|
96
|
+
runs-on: ubuntu-24.04
|
|
92
97
|
|
|
93
98
|
steps:
|
|
94
99
|
- name: Checkout
|
|
@@ -121,11 +126,11 @@ jobs:
|
|
|
121
126
|
|
|
122
127
|
fsspec_friends:
|
|
123
128
|
name: ${{ matrix.FRIEND }}-pytest
|
|
124
|
-
runs-on: ubuntu-
|
|
129
|
+
runs-on: ubuntu-24.04
|
|
125
130
|
strategy:
|
|
126
131
|
fail-fast: false
|
|
127
132
|
matrix:
|
|
128
|
-
FRIEND: [
|
|
133
|
+
FRIEND: [s3fs, gcsfs]
|
|
129
134
|
|
|
130
135
|
env:
|
|
131
136
|
CIRUN: true
|
|
@@ -150,8 +155,11 @@ jobs:
|
|
|
150
155
|
shell: bash -l {0}
|
|
151
156
|
run: |
|
|
152
157
|
pip install -e . --no-deps
|
|
153
|
-
pip
|
|
158
|
+
pip list
|
|
154
159
|
|
|
155
160
|
- name: Test
|
|
156
161
|
shell: bash -l {0}
|
|
157
|
-
run:
|
|
162
|
+
run: |
|
|
163
|
+
cd ${{ matrix.FRIEND }}
|
|
164
|
+
pytest -v
|
|
165
|
+
cd ..
|
|
@@ -5,7 +5,7 @@ exclude: >
|
|
|
5
5
|
repos:
|
|
6
6
|
|
|
7
7
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
8
|
-
rev:
|
|
8
|
+
rev: v5.0.0
|
|
9
9
|
hooks:
|
|
10
10
|
- id: trailing-whitespace
|
|
11
11
|
- id: end-of-file-fixer
|
|
@@ -14,10 +14,14 @@ repos:
|
|
|
14
14
|
- id: check-yaml
|
|
15
15
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
16
16
|
# Ruff version.
|
|
17
|
-
rev: v0.
|
|
17
|
+
rev: v0.9.2
|
|
18
18
|
hooks:
|
|
19
19
|
# Run the linter.
|
|
20
20
|
- id: ruff
|
|
21
21
|
args: [ --fix, "--show-fixes"]
|
|
22
22
|
- id: ruff-format
|
|
23
23
|
types_or: [python]
|
|
24
|
+
- repo: https://github.com/codespell-project/codespell
|
|
25
|
+
rev: v2.4.0
|
|
26
|
+
hooks:
|
|
27
|
+
- id: codespell
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2025.2.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
@@ -46,6 +46,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
46
46
|
Classifier: Programming Language :: Python :: 3.10
|
|
47
47
|
Classifier: Programming Language :: Python :: 3.11
|
|
48
48
|
Classifier: Programming Language :: Python :: 3.12
|
|
49
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
49
50
|
Requires-Python: >=3.8
|
|
50
51
|
Provides-Extra: abfs
|
|
51
52
|
Requires-Dist: adlfs; extra == 'abfs'
|
|
@@ -130,7 +131,6 @@ Requires-Dist: pytest-rerunfailures; extra == 'test'
|
|
|
130
131
|
Requires-Dist: requests; extra == 'test'
|
|
131
132
|
Provides-Extra: test-downstream
|
|
132
133
|
Requires-Dist: aiobotocore<3.0.0,>=2.5.4; extra == 'test-downstream'
|
|
133
|
-
Requires-Dist: dask-expr; extra == 'test-downstream'
|
|
134
134
|
Requires-Dist: dask[dataframe,test]; extra == 'test-downstream'
|
|
135
135
|
Requires-Dist: moto[server]<5,>4; extra == 'test-downstream'
|
|
136
136
|
Requires-Dist: pytest-timeout; extra == 'test-downstream'
|
|
@@ -9,7 +9,6 @@ dependencies:
|
|
|
9
9
|
- pytest-cov
|
|
10
10
|
- pytest-mock
|
|
11
11
|
- pip
|
|
12
|
-
- pytest<8
|
|
13
12
|
- ujson
|
|
14
13
|
- requests
|
|
15
14
|
- decorator
|
|
@@ -25,5 +24,8 @@ dependencies:
|
|
|
25
24
|
- google-api-python-client
|
|
26
25
|
- httpretty
|
|
27
26
|
- aiobotocore
|
|
28
|
-
-
|
|
27
|
+
- moto
|
|
29
28
|
- flask
|
|
29
|
+
- pip:
|
|
30
|
+
- git+https://github.com/fsspec/s3fs
|
|
31
|
+
- git+https://github.com/fsspec/gcsfs
|
|
@@ -209,41 +209,73 @@ Built-in Implementations
|
|
|
209
209
|
Other Known Implementations
|
|
210
210
|
---------------------------
|
|
211
211
|
|
|
212
|
-
|
|
213
|
-
|
|
212
|
+
|
|
213
|
+
Note that most of these projects are hosted outside of the `fsspec` organisation. Please read their
|
|
214
|
+
documentation carefully before using any particular package.
|
|
215
|
+
|
|
216
|
+
- `abfs`_ for Azure Blob service, with protocol "abfs://"
|
|
217
|
+
- `adl`_ for Azure DataLake storage, with protocol "adl://"
|
|
214
218
|
- `alluxiofs`_ to access fsspec implemented filesystem with Alluxio distributed cache
|
|
215
|
-
- `boxfs`_ for access to Box file storage
|
|
216
|
-
- `
|
|
219
|
+
- `boxfs`_ for access to Box file storage, with protocol "box://"
|
|
220
|
+
- `csvbase`_ for access to csvbase.com hosted CSV files, with protocol "csvbase://"
|
|
221
|
+
- `dropbox`_ for access to dropbox shares, with protocol "dropbox://"
|
|
217
222
|
- `dvc`_ to access DVC/Git repository as a filesystem
|
|
218
|
-
- `
|
|
223
|
+
- `fsspec-encrypted`_ for transparent encryption on top of other fsspec filesystems.
|
|
224
|
+
- `gcsfs`_ for Google Cloud Storage, with protocol "gcs://"
|
|
219
225
|
- `gdrive`_ to access Google Drive and shares (experimental)
|
|
226
|
+
- `git`_ to access Git repositories
|
|
220
227
|
- `huggingface_hub`_ to access the Hugging Face Hub filesystem, with protocol "hf://"
|
|
221
|
-
- `
|
|
222
|
-
- `
|
|
228
|
+
- `hdfs-native`_ to access Hadoop filesystem, with protocol "hdfs://"
|
|
229
|
+
- `httpfs-sync`_ to access HTTP(s) files in a synchronous manner to offer an alternative to the aiohttp-based implementation.
|
|
230
|
+
- `ipfsspec`_ for the InterPlanetary File System (IPFS), with protocol "ipfs://"
|
|
231
|
+
- `irods`_ for access to iRODS servers, with protocol "irods://"
|
|
232
|
+
- `lakefs`_ for lakeFS data lakes, with protocol "lakefs://"
|
|
233
|
+
- `morefs`_ for `OverlayFileSystem`, `DictFileSystem`, and others
|
|
234
|
+
- `ocifs`_ for access to Oracle Cloud Object Storage, with protocol "oci://"
|
|
223
235
|
- `ocilake`_ for OCI Data Lake storage
|
|
224
236
|
- `ossfs`_ for Alibaba Cloud (Aliyun) Object Storage System (OSS)
|
|
225
237
|
- `p9fs`_ for 9P (Plan 9 Filesystem Protocol) servers
|
|
226
|
-
- `
|
|
238
|
+
- `PyAthena`_ for S3 access to Amazon Athena, with protocol "s3://" or "s3a://"
|
|
239
|
+
- `PyDrive2`_ for Google Drive access
|
|
240
|
+
- `s3fs`_ for Amazon S3 and other compatible stores, with protocol "s3://"
|
|
241
|
+
- `sshfs`_ for access to SSH servers, with protocol "ssh://" or "sftp://"
|
|
242
|
+
- `swiftspec`_ for OpenStack SWIFT, with protocol "swift://"
|
|
243
|
+
- `tosfs`_ for ByteDance volcano engine Tinder Object Storage (TOS)
|
|
227
244
|
- `wandbfs`_ to access Wandb run data (experimental)
|
|
228
|
-
- `
|
|
245
|
+
- `wandbfsspec`_ to access Weights & Biases (experimental)
|
|
246
|
+
- `webdav4`_ for WebDAV, with protocol "webdav://" or "dav://"
|
|
229
247
|
- `xrootd`_ for xrootd, with protocol "root://"
|
|
230
248
|
|
|
231
249
|
.. _abfs: https://github.com/dask/adlfs
|
|
232
250
|
.. _adl: https://github.com/dask/adlfs
|
|
233
251
|
.. _alluxiofs: https://github.com/fsspec/alluxiofs
|
|
234
252
|
.. _boxfs: https://github.com/IBM/boxfs
|
|
235
|
-
..
|
|
253
|
+
.. _csvbase: https://github.com/calpaterson/csvbase-client
|
|
254
|
+
.. _dropbox: https://github.com/fsspec/dropboxdrivefs
|
|
236
255
|
.. _dvc: https://github.com/iterative/dvc
|
|
256
|
+
.. _fsspec-encrypted: https://github.com/thevgergroup/fsspec-encrypted
|
|
237
257
|
.. _gcsfs: https://gcsfs.readthedocs.io/en/latest/
|
|
238
258
|
.. _gdrive: https://github.com/fsspec/gdrivefs
|
|
259
|
+
.. _git: https://github.com/iterative/scmrepo
|
|
260
|
+
.. _hdfs-native: https://github.com/Kimahriman/hdfs-native/blob/master/python/hdfs_native/fsspec.py
|
|
261
|
+
.. _httpfs-sync: https://github.com/moradology/httpfs-sync
|
|
239
262
|
.. _huggingface_hub: https://huggingface.co/docs/huggingface_hub/main/en/guides/hf_file_system
|
|
240
|
-
..
|
|
241
|
-
..
|
|
263
|
+
.. _ipfsspec: https://github.com/fsspec/ipfsspec
|
|
264
|
+
.. _irods: https://github.com/xwcl/irods_fsspec
|
|
265
|
+
.. _lakefs: https://github.com/aai-institute/lakefs-spec
|
|
266
|
+
.. _morefs: https://github.com/iterative/morefs
|
|
267
|
+
.. _ocifs: https://ocifs.readthedocs.io/en/latest/
|
|
242
268
|
.. _ocilake: https://github.com/oracle/ocifs
|
|
243
269
|
.. _ossfs: https://github.com/fsspec/ossfs
|
|
244
270
|
.. _p9fs: https://github.com/pbchekin/p9fs-py
|
|
271
|
+
.. _PyAthena: https://github.com/laughingman7743/PyAthena
|
|
272
|
+
.. _PyDrive2: https://github.com/iterative/PyDrive2
|
|
245
273
|
.. _s3fs: https://s3fs.readthedocs.io/en/latest/
|
|
274
|
+
.. _sshfs: https://github.com/fsspec/sshfs
|
|
275
|
+
.. _swiftspec: https://github.com/fsspec/swiftspec
|
|
276
|
+
.. _tosfs: https://tosfs.readthedocs.io/en/latest/
|
|
246
277
|
.. _wandbfs: https://github.com/jkulhanek/wandbfs
|
|
278
|
+
.. _wandbfsspec: https://github.com/alvarobartt/wandbfsspec
|
|
247
279
|
.. _webdav4: https://github.com/skshetry/webdav4
|
|
248
280
|
.. _xrootd: https://github.com/CoffeaTeam/fsspec-xrootd
|
|
249
281
|
|
|
@@ -152,3 +152,37 @@ available as the attribute ``.loop``.
|
|
|
152
152
|
|
|
153
153
|
<script data-goatcounter="https://fsspec.goatcounter.com/count"
|
|
154
154
|
async src="//gc.zgo.at/count.js"></script>
|
|
155
|
+
|
|
156
|
+
AsyncFileSystemWrapper
|
|
157
|
+
----------------------
|
|
158
|
+
|
|
159
|
+
The `AsyncFileSystemWrapper` class is an experimental feature that allows you to convert
|
|
160
|
+
a synchronous filesystem into an asynchronous one. This is useful for quickly integrating
|
|
161
|
+
synchronous filesystems into workflows that may expect `AsyncFileSystem` instances.
|
|
162
|
+
|
|
163
|
+
Basic Usage
|
|
164
|
+
~~~~~~~~~~~
|
|
165
|
+
|
|
166
|
+
To use `AsyncFileSystemWrapper`, wrap any synchronous filesystem to work in an asynchronous context.
|
|
167
|
+
In this example, the synchronous `LocalFileSystem` is wrapped, creating an `AsyncFileSystem` instance
|
|
168
|
+
backed by the normal, synchronous methods of `LocalFileSystem`:
|
|
169
|
+
|
|
170
|
+
.. code-block:: python
|
|
171
|
+
|
|
172
|
+
import asyncio
|
|
173
|
+
import fsspec
|
|
174
|
+
from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
|
|
175
|
+
|
|
176
|
+
async def async_copy_file():
|
|
177
|
+
sync_fs = fsspec.filesystem('file') # by-default synchronous, local filesystem
|
|
178
|
+
async_fs = AsyncFileSystemWrapper(sync_fs)
|
|
179
|
+
return await async_fs._copy('/source/file.txt', '/destination/file.txt')
|
|
180
|
+
|
|
181
|
+
asyncio.run(async_copy_file())
|
|
182
|
+
|
|
183
|
+
Limitations
|
|
184
|
+
-----------
|
|
185
|
+
|
|
186
|
+
This is experimental. Users should not expect this wrapper to magically make things faster.
|
|
187
|
+
It is primarily provided to allow usage of synchronous filesystems with interfaces that expect
|
|
188
|
+
`AsyncFileSystem` instances.
|
|
@@ -1,6 +1,50 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
2025.2.0
|
|
5
|
+
--------
|
|
6
|
+
|
|
7
|
+
Enhancements
|
|
8
|
+
|
|
9
|
+
- add open() to referenceFS (#1778)
|
|
10
|
+
|
|
11
|
+
Fixes
|
|
12
|
+
|
|
13
|
+
- don't make async open() in async-wrapper (#1769)
|
|
14
|
+
- fix CI following dask-expr upstream change (#1781)
|
|
15
|
+
- cope with zarr3 "Buffer" objects in referenceFS (#1784)
|
|
16
|
+
|
|
17
|
+
Other
|
|
18
|
+
|
|
19
|
+
- use itemgetter in archiveFS (#1764)
|
|
20
|
+
- document that newline is included in readline(s) (#1770)
|
|
21
|
+
- format/spelling (#1774, 1779, 1780)
|
|
22
|
+
|
|
23
|
+
2024.12.0
|
|
24
|
+
---------
|
|
25
|
+
|
|
26
|
+
Enhancements
|
|
27
|
+
|
|
28
|
+
- "exclusive" mode for writing (#1762, 1756, 174+)
|
|
29
|
+
- "tree" text display of filesystem contents (#1750)
|
|
30
|
+
- async wrapper for sync FSs (#1745)
|
|
31
|
+
- new known implementation: tosfs (#1739)
|
|
32
|
+
- consilidate block fetch requests (#1733)
|
|
33
|
+
|
|
34
|
+
Fixes
|
|
35
|
+
|
|
36
|
+
- better webHDFS proxies (#
|
|
37
|
+
- syn FSs in referenceFS (#1755)
|
|
38
|
+
- don't serialize file caches (#1753)
|
|
39
|
+
- race condition in local ls() (#1744)
|
|
40
|
+
- missing/nan references in parquet (#1738)
|
|
41
|
+
- _un_chain kwargs (@1736)
|
|
42
|
+
- async _cat_file in referenceFS (#1734)
|
|
43
|
+
|
|
44
|
+
Other
|
|
45
|
+
|
|
46
|
+
- fallback implementation for _fetch_range (#1732)
|
|
47
|
+
|
|
4
48
|
2024.10.0
|
|
5
49
|
---------
|
|
6
50
|
|
|
@@ -408,3 +408,21 @@ tqdm.
|
|
|
408
408
|
|
|
409
409
|
<script data-goatcounter="https://fsspec.goatcounter.com/count"
|
|
410
410
|
async src="//gc.zgo.at/count.js"></script>
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
Exclusive write
|
|
414
|
+
---------------
|
|
415
|
+
|
|
416
|
+
Some backends support writing to a file only if it doesn't already exist. This may be
|
|
417
|
+
implemented for the following methods:
|
|
418
|
+
- pipe_file (with argument ``mode=='create'``)
|
|
419
|
+
- put_file (with argument ``mode=='create'``)
|
|
420
|
+
- open (with argument ``mode="xb"``)
|
|
421
|
+
Since some writes will be achieved in blocks, the timing of when the check is done is
|
|
422
|
+
not defined - it may be at the start or at the completion of the operation, depending
|
|
423
|
+
on the backend.
|
|
424
|
+
|
|
425
|
+
If using exclusive mode on a file that does already exist, a ``FileExistsError`` will
|
|
426
|
+
be raised.
|
|
427
|
+
|
|
428
|
+
This feature is currently included on a trial basis and may change in the future.
|
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '
|
|
16
|
-
__version_tuple__ = version_tuple = (
|
|
15
|
+
__version__ = version = '2025.2.0'
|
|
16
|
+
__version_tuple__ = version_tuple = (2025, 2, 0)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import operator
|
|
2
|
+
|
|
1
3
|
from fsspec import AbstractFileSystem
|
|
2
4
|
from fsspec.utils import tokenize
|
|
3
5
|
|
|
@@ -67,7 +69,7 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
|
|
|
67
69
|
out = {"name": ppath, "size": 0, "type": "directory"}
|
|
68
70
|
paths[ppath] = out
|
|
69
71
|
if detail:
|
|
70
|
-
out = sorted(paths.values(), key=
|
|
72
|
+
out = sorted(paths.values(), key=operator.itemgetter("name"))
|
|
71
73
|
return out
|
|
72
74
|
else:
|
|
73
75
|
return sorted(paths)
|
|
@@ -408,7 +408,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
408
408
|
continue
|
|
409
409
|
raise ex
|
|
410
410
|
|
|
411
|
-
async def _pipe_file(self, path, value, **kwargs):
|
|
411
|
+
async def _pipe_file(self, path, value, mode="overwrite", **kwargs):
|
|
412
412
|
raise NotImplementedError
|
|
413
413
|
|
|
414
414
|
async def _pipe(self, path, value=None, batch_size=None, **kwargs):
|
|
@@ -517,7 +517,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
517
517
|
coros, batch_size=batch_size, nofiles=True, return_exceptions=True
|
|
518
518
|
)
|
|
519
519
|
|
|
520
|
-
async def _put_file(self, lpath, rpath, **kwargs):
|
|
520
|
+
async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
|
|
521
521
|
raise NotImplementedError
|
|
522
522
|
|
|
523
523
|
async def _put(
|
|
@@ -816,11 +816,9 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
816
816
|
p: info
|
|
817
817
|
for p, info in sorted(allpaths.items())
|
|
818
818
|
if pattern.match(
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
else p
|
|
823
|
-
)
|
|
819
|
+
p + "/"
|
|
820
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
821
|
+
else p
|
|
824
822
|
)
|
|
825
823
|
}
|
|
826
824
|
|
|
@@ -8,6 +8,8 @@ import os
|
|
|
8
8
|
import threading
|
|
9
9
|
import warnings
|
|
10
10
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
11
|
+
from itertools import groupby
|
|
12
|
+
from operator import itemgetter
|
|
11
13
|
from typing import (
|
|
12
14
|
TYPE_CHECKING,
|
|
13
15
|
Any,
|
|
@@ -85,12 +87,7 @@ class BaseCache:
|
|
|
85
87
|
if self.hit_count == 0 and self.miss_count == 0:
|
|
86
88
|
# a cache that does nothing, this is for logs only
|
|
87
89
|
return ""
|
|
88
|
-
return " ,
|
|
89
|
-
self.name,
|
|
90
|
-
self.hit_count,
|
|
91
|
-
self.miss_count,
|
|
92
|
-
self.total_requested_bytes,
|
|
93
|
-
)
|
|
90
|
+
return f" , {self.name}: {self.hit_count} hits, {self.miss_count} misses, {self.total_requested_bytes} total requested bytes"
|
|
94
91
|
|
|
95
92
|
def __repr__(self) -> str:
|
|
96
93
|
# TODO: use rich for better formatting
|
|
@@ -161,21 +158,39 @@ class MMapCache(BaseCache):
|
|
|
161
158
|
return b""
|
|
162
159
|
start_block = start // self.blocksize
|
|
163
160
|
end_block = end // self.blocksize
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
161
|
+
block_range = range(start_block, end_block + 1)
|
|
162
|
+
# Determine which blocks need to be fetched. This sequence is sorted by construction.
|
|
163
|
+
need = (i for i in block_range if i not in self.blocks)
|
|
164
|
+
# Count the number of blocks already cached
|
|
165
|
+
self.hit_count += sum(1 for i in block_range if i in self.blocks)
|
|
166
|
+
|
|
167
|
+
# Consolidate needed blocks.
|
|
168
|
+
# Algorithm adapted from Python 2.x itertools documentation.
|
|
169
|
+
# We are grouping an enumerated sequence of blocks. By comparing when the difference
|
|
170
|
+
# between an ascending range (provided by enumerate) and the needed block numbers
|
|
171
|
+
# we can detect when the block number skips values. The key computes this difference.
|
|
172
|
+
# Whenever the difference changes, we know that we have previously cached block(s),
|
|
173
|
+
# and a new group is started. In other words, this algorithm neatly groups
|
|
174
|
+
# runs of consecutive block numbers so they can be fetched together.
|
|
175
|
+
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
|
|
176
|
+
# Extract the blocks from the enumerated sequence
|
|
177
|
+
_blocks = tuple(map(itemgetter(1), _blocks))
|
|
178
|
+
# Compute start of first block
|
|
179
|
+
sstart = _blocks[0] * self.blocksize
|
|
180
|
+
# Compute the end of the last block. Last block may not be full size.
|
|
181
|
+
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
|
|
182
|
+
|
|
183
|
+
# Fetch bytes (could be multiple consecutive blocks)
|
|
175
184
|
self.total_requested_bytes += send - sstart
|
|
176
|
-
logger.debug(
|
|
185
|
+
logger.debug(
|
|
186
|
+
f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send})"
|
|
187
|
+
)
|
|
177
188
|
self.cache[sstart:send] = self.fetcher(sstart, send)
|
|
178
|
-
|
|
189
|
+
|
|
190
|
+
# Update set of cached blocks
|
|
191
|
+
self.blocks.update(_blocks)
|
|
192
|
+
# Update cache statistics with number of blocks we had to cache
|
|
193
|
+
self.miss_count += len(_blocks)
|
|
179
194
|
|
|
180
195
|
return self.cache[start:end]
|
|
181
196
|
|
|
@@ -329,12 +329,19 @@ def open_files(
|
|
|
329
329
|
|
|
330
330
|
|
|
331
331
|
def _un_chain(path, kwargs):
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
332
|
+
# Avoid a circular import
|
|
333
|
+
from fsspec.implementations.cached import CachingFileSystem
|
|
334
|
+
|
|
335
|
+
if "::" in path:
|
|
336
|
+
x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
|
|
337
|
+
bits = []
|
|
338
|
+
for p in path.split("::"):
|
|
339
|
+
if "://" in p or x.match(p):
|
|
340
|
+
bits.append(p)
|
|
341
|
+
else:
|
|
342
|
+
bits.append(p + "://")
|
|
343
|
+
else:
|
|
344
|
+
bits = [path]
|
|
338
345
|
# [[url, protocol, kwargs], ...]
|
|
339
346
|
out = []
|
|
340
347
|
previous_bit = None
|
|
@@ -351,10 +358,7 @@ def _un_chain(path, kwargs):
|
|
|
351
358
|
**kws,
|
|
352
359
|
)
|
|
353
360
|
bit = cls._strip_protocol(bit)
|
|
354
|
-
if (
|
|
355
|
-
protocol in {"blockcache", "filecache", "simplecache"}
|
|
356
|
-
and "target_protocol" not in kw
|
|
357
|
-
):
|
|
361
|
+
if "target_protocol" not in kw and issubclass(cls, CachingFileSystem):
|
|
358
362
|
bit = previous_bit
|
|
359
363
|
out.append((bit, protocol, kw))
|
|
360
364
|
previous_bit = bit
|
|
@@ -676,9 +680,7 @@ def get_fs_token_paths(
|
|
|
676
680
|
elif not isinstance(paths, list):
|
|
677
681
|
paths = list(paths)
|
|
678
682
|
else:
|
|
679
|
-
if "w" in mode and expand:
|
|
680
|
-
paths = _expand_paths(paths, name_function, num)
|
|
681
|
-
elif "x" in mode and expand:
|
|
683
|
+
if ("w" in mode or "x" in mode) and expand:
|
|
682
684
|
paths = _expand_paths(paths, name_function, num)
|
|
683
685
|
elif "*" in paths:
|
|
684
686
|
paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import functools
|
|
3
|
+
import inspect
|
|
4
|
+
|
|
5
|
+
from fsspec.asyn import AsyncFileSystem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def async_wrapper(func, obj=None):
|
|
9
|
+
"""
|
|
10
|
+
Wraps a synchronous function to make it awaitable.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
func : callable
|
|
15
|
+
The synchronous function to wrap.
|
|
16
|
+
obj : object, optional
|
|
17
|
+
The instance to bind the function to, if applicable.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
coroutine
|
|
22
|
+
An awaitable version of the function.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@functools.wraps(func)
|
|
26
|
+
async def wrapper(*args, **kwargs):
|
|
27
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
|
28
|
+
|
|
29
|
+
return wrapper
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
33
|
+
"""
|
|
34
|
+
A wrapper class to convert a synchronous filesystem into an asynchronous one.
|
|
35
|
+
|
|
36
|
+
This class takes an existing synchronous filesystem implementation and wraps all
|
|
37
|
+
its methods to provide an asynchronous interface.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
sync_fs : AbstractFileSystem
|
|
42
|
+
The synchronous filesystem instance to wrap.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, sync_fs, *args, **kwargs):
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
self.asynchronous = True
|
|
48
|
+
self.sync_fs = sync_fs
|
|
49
|
+
self.protocol = self.sync_fs.protocol
|
|
50
|
+
self._wrap_all_sync_methods()
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def fsid(self):
|
|
54
|
+
return f"async_{self.sync_fs.fsid}"
|
|
55
|
+
|
|
56
|
+
def _wrap_all_sync_methods(self):
|
|
57
|
+
"""
|
|
58
|
+
Wrap all synchronous methods of the underlying filesystem with asynchronous versions.
|
|
59
|
+
"""
|
|
60
|
+
excluded_methods = {"open"}
|
|
61
|
+
for method_name in dir(self.sync_fs):
|
|
62
|
+
if method_name.startswith("_") or method_name in excluded_methods:
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
attr = inspect.getattr_static(self.sync_fs, method_name)
|
|
66
|
+
if isinstance(attr, property):
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
method = getattr(self.sync_fs, method_name)
|
|
70
|
+
if callable(method) and not asyncio.iscoroutinefunction(method):
|
|
71
|
+
async_method = async_wrapper(method, obj=self)
|
|
72
|
+
setattr(self, f"_{method_name}", async_method)
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def wrap_class(cls, sync_fs_class):
|
|
76
|
+
"""
|
|
77
|
+
Create a new class that can be used to instantiate an AsyncFileSystemWrapper
|
|
78
|
+
with lazy instantiation of the underlying synchronous filesystem.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
sync_fs_class : type
|
|
83
|
+
The class of the synchronous filesystem to wrap.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
type
|
|
88
|
+
A new class that wraps the provided synchronous filesystem class.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
class GeneratedAsyncFileSystemWrapper(cls):
|
|
92
|
+
def __init__(self, *args, **kwargs):
|
|
93
|
+
sync_fs = sync_fs_class(*args, **kwargs)
|
|
94
|
+
super().__init__(sync_fs)
|
|
95
|
+
|
|
96
|
+
GeneratedAsyncFileSystemWrapper.__name__ = (
|
|
97
|
+
f"Async{sync_fs_class.__name__}Wrapper"
|
|
98
|
+
)
|
|
99
|
+
return GeneratedAsyncFileSystemWrapper
|
|
@@ -612,7 +612,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
612
612
|
**kwargs,
|
|
613
613
|
):
|
|
614
614
|
paths = self.expand_path(
|
|
615
|
-
path, recursive=recursive, maxdepth=kwargs.get("maxdepth"
|
|
615
|
+
path, recursive=recursive, maxdepth=kwargs.get("maxdepth")
|
|
616
616
|
)
|
|
617
617
|
getpaths = []
|
|
618
618
|
storepaths = []
|
|
@@ -412,9 +412,9 @@ class DatabricksFile(AbstractBufferedFile):
|
|
|
412
412
|
if block_size is None or block_size == "default":
|
|
413
413
|
block_size = self.DEFAULT_BLOCK_SIZE
|
|
414
414
|
|
|
415
|
-
assert (
|
|
416
|
-
|
|
417
|
-
)
|
|
415
|
+
assert block_size == self.DEFAULT_BLOCK_SIZE, (
|
|
416
|
+
f"Only the default block size is allowed, not {block_size}"
|
|
417
|
+
)
|
|
418
418
|
|
|
419
419
|
super().__init__(
|
|
420
420
|
fs,
|