fsspec 2025.5.1__tar.gz → 2025.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.github/workflows/main.yaml +2 -18
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.pre-commit-config.yaml +2 -3
- {fsspec-2025.5.1 → fsspec-2025.9.0}/PKG-INFO +12 -33
- {fsspec-2025.5.1 → fsspec-2025.9.0}/README.md +8 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/api.rst +5 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/changelog.rst +47 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/_version.py +16 -3
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/asyn.py +3 -16
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/caching.py +2 -3
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/compression.py +17 -10
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/generic.py +4 -5
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/gui.py +2 -1
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/asyn_wrapper.py +11 -3
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/cache_metadata.py +3 -2
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/cached.py +49 -4
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/dbfs.py +31 -3
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/ftp.py +1 -9
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/git.py +1 -2
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/http.py +25 -15
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/local.py +5 -1
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/memory.py +1 -2
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/reference.py +1 -1
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/json.py +6 -10
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/registry.py +6 -2
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/utils.py +1 -3
- {fsspec-2025.5.1 → fsspec-2025.9.0}/pyproject.toml +7 -6
- fsspec-2025.5.1/ci/environment-typecheck.yml +0 -15
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.codespellrc +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.coveragerc +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.gitattributes +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.github/workflows/pypipublish.yaml +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/.gitignore +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/LICENSE +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/ci/environment-downstream.yml +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/ci/environment-friends.yml +0 -0
- /fsspec-2025.5.1/ci/environment-py38.yml → /fsspec-2025.9.0/ci/environment-linux.yml +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/ci/environment-win.yml +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/Makefile +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/README.md +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/environment.yml +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/make.bat +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/_static/custom.css +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/async.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/conf.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/copying.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/developer.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/features.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/img/gui.png +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/index.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/intro.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/docs/source/usage.rst +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/__init__.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/archive.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/callbacks.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/config.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/conftest.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/core.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/dircache.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/exceptions.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/fuse.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/__init__.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/arrow.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/cache_mapper.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/dask.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/data.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/dirfs.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/gist.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/github.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/http_sync.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/jupyter.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/libarchive.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/sftp.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/smb.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/tar.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/webhdfs.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/implementations/zip.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/mapping.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/parquet.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/spec.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/__init__.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/common.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/copy.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/get.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/mv.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/open.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/pipe.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/tests/abstract/put.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/fsspec/transaction.py +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/install_s3fs.sh +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/readthedocs.yml +0 -0
- {fsspec-2025.5.1 → fsspec-2025.9.0}/setup.cfg +0 -0
|
@@ -32,7 +32,7 @@ jobs:
|
|
|
32
32
|
- name: Setup conda
|
|
33
33
|
uses: conda-incubator/setup-miniconda@v3
|
|
34
34
|
with:
|
|
35
|
-
environment-file: ci/environment-
|
|
35
|
+
environment-file: ci/environment-linux.yml
|
|
36
36
|
python-version: ${{ matrix.PY }}
|
|
37
37
|
|
|
38
38
|
- name: Run Tests
|
|
@@ -43,7 +43,7 @@ jobs:
|
|
|
43
43
|
|
|
44
44
|
win:
|
|
45
45
|
name: pytest-win
|
|
46
|
-
runs-on: windows-
|
|
46
|
+
runs-on: windows-2022
|
|
47
47
|
|
|
48
48
|
env:
|
|
49
49
|
CIRUN: true
|
|
@@ -75,22 +75,6 @@ jobs:
|
|
|
75
75
|
python-version: "3.11"
|
|
76
76
|
- uses: pre-commit/action@main
|
|
77
77
|
|
|
78
|
-
# typecheck:
|
|
79
|
-
# runs-on: ubuntu-latest
|
|
80
|
-
# steps:
|
|
81
|
-
# - name: Checkout
|
|
82
|
-
# uses: actions/checkout@v4
|
|
83
|
-
#
|
|
84
|
-
# - name: Setup conda
|
|
85
|
-
# uses: conda-incubator/setup-miniconda@v3
|
|
86
|
-
# with:
|
|
87
|
-
# environment-file: ci/environment-typecheck.yml
|
|
88
|
-
#
|
|
89
|
-
# - name: mypy
|
|
90
|
-
# shell: bash -l {0}
|
|
91
|
-
# run: |
|
|
92
|
-
# mypy fsspec
|
|
93
|
-
#
|
|
94
78
|
downstream:
|
|
95
79
|
name: downstream
|
|
96
80
|
runs-on: ubuntu-24.04
|
|
@@ -13,11 +13,10 @@ repos:
|
|
|
13
13
|
- id: check-json
|
|
14
14
|
- id: check-yaml
|
|
15
15
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
16
|
-
|
|
17
|
-
rev: v0.9.2
|
|
16
|
+
rev: v0.12.2
|
|
18
17
|
hooks:
|
|
19
18
|
# Run the linter.
|
|
20
|
-
- id: ruff
|
|
19
|
+
- id: ruff-check
|
|
21
20
|
args: [ --fix, "--show-fixes"]
|
|
22
21
|
- id: ruff-format
|
|
23
22
|
types_or: [python]
|
|
@@ -1,45 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.9.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
7
7
|
Project-URL: Homepage, https://github.com/fsspec/filesystem_spec
|
|
8
8
|
Maintainer-email: Martin Durant <mdurant@anaconda.com>
|
|
9
|
-
License: BSD
|
|
10
|
-
|
|
11
|
-
Copyright (c) 2018, Martin Durant
|
|
12
|
-
All rights reserved.
|
|
13
|
-
|
|
14
|
-
Redistribution and use in source and binary forms, with or without
|
|
15
|
-
modification, are permitted provided that the following conditions are met:
|
|
16
|
-
|
|
17
|
-
* Redistributions of source code must retain the above copyright notice, this
|
|
18
|
-
list of conditions and the following disclaimer.
|
|
19
|
-
|
|
20
|
-
* Redistributions in binary form must reproduce the above copyright notice,
|
|
21
|
-
this list of conditions and the following disclaimer in the documentation
|
|
22
|
-
and/or other materials provided with the distribution.
|
|
23
|
-
|
|
24
|
-
* Neither the name of the copyright holder nor the names of its
|
|
25
|
-
contributors may be used to endorse or promote products derived from
|
|
26
|
-
this software without specific prior written permission.
|
|
27
|
-
|
|
28
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
29
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
30
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
31
|
-
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
32
|
-
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
33
|
-
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
34
|
-
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
35
|
-
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
36
|
-
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
37
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
9
|
+
License-Expression: BSD-3-Clause
|
|
38
10
|
License-File: LICENSE
|
|
39
11
|
Keywords: file
|
|
40
12
|
Classifier: Development Status :: 4 - Beta
|
|
41
13
|
Classifier: Intended Audience :: Developers
|
|
42
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
43
14
|
Classifier: Operating System :: OS Independent
|
|
44
15
|
Classifier: Programming Language :: Python :: 3.9
|
|
45
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -58,7 +29,7 @@ Requires-Dist: dask; extra == 'dask'
|
|
|
58
29
|
Requires-Dist: distributed; extra == 'dask'
|
|
59
30
|
Provides-Extra: dev
|
|
60
31
|
Requires-Dist: pre-commit; extra == 'dev'
|
|
61
|
-
Requires-Dist: ruff; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
62
33
|
Provides-Extra: doc
|
|
63
34
|
Requires-Dist: numpydoc; extra == 'doc'
|
|
64
35
|
Requires-Dist: sphinx; extra == 'doc'
|
|
@@ -172,7 +143,7 @@ Requires-Dist: smbprotocol; extra == 'test-full'
|
|
|
172
143
|
Requires-Dist: tqdm; extra == 'test-full'
|
|
173
144
|
Requires-Dist: urllib3; extra == 'test-full'
|
|
174
145
|
Requires-Dist: zarr; extra == 'test-full'
|
|
175
|
-
Requires-Dist: zstandard; extra == 'test-full'
|
|
146
|
+
Requires-Dist: zstandard; (python_version < '3.14') and extra == 'test-full'
|
|
176
147
|
Provides-Extra: tqdm
|
|
177
148
|
Requires-Dist: tqdm; extra == 'tqdm'
|
|
178
149
|
Description-Content-Type: text/markdown
|
|
@@ -275,3 +246,11 @@ filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
|
|
|
275
246
|
before you commit, reformatting any changed files. You can format without
|
|
276
247
|
committing via ``pre-commit run`` or skip these checks with ``git commit
|
|
277
248
|
--no-verify``.
|
|
249
|
+
|
|
250
|
+
## Support
|
|
251
|
+
|
|
252
|
+
Work on this repository is supported in part by:
|
|
253
|
+
|
|
254
|
+
"Anaconda, Inc. - Advancing AI through open source."
|
|
255
|
+
|
|
256
|
+
<a href="https://anaconda.com/"><img src="https://camo.githubusercontent.com/b8555ef2222598ed37ce38ac86955febbd25de7619931bb7dd3c58432181d3b6/68747470733a2f2f626565776172652e6f72672f636f6d6d756e6974792f6d656d626572732f616e61636f6e64612f616e61636f6e64612d6c617267652e706e67" alt="anaconda logo" width="40%"/></a>
|
|
@@ -96,3 +96,11 @@ filesystem_spec repository to setup pre-commit hooks. ``black`` will now be run
|
|
|
96
96
|
before you commit, reformatting any changed files. You can format without
|
|
97
97
|
committing via ``pre-commit run`` or skip these checks with ``git commit
|
|
98
98
|
--no-verify``.
|
|
99
|
+
|
|
100
|
+
## Support
|
|
101
|
+
|
|
102
|
+
Work on this repository is supported in part by:
|
|
103
|
+
|
|
104
|
+
"Anaconda, Inc. - Advancing AI through open source."
|
|
105
|
+
|
|
106
|
+
<a href="https://anaconda.com/"><img src="https://camo.githubusercontent.com/b8555ef2222598ed37ce38ac86955febbd25de7619931bb7dd3c58432181d3b6/68747470733a2f2f626565776172652e6f72672f636f6d6d756e6974792f6d656d626572732f616e61636f6e64612f616e61636f6e64612d6c617267652e706e67" alt="anaconda logo" width="40%"/></a>
|
|
@@ -235,6 +235,7 @@ documentation carefully before using any particular package.
|
|
|
235
235
|
- `irods`_ for access to iRODS servers, with protocol "irods://"
|
|
236
236
|
- `lakefs`_ for lakeFS data lakes, with protocol "lakefs://"
|
|
237
237
|
- `morefs`_ for `OverlayFileSystem`, `DictFileSystem`, and others
|
|
238
|
+
- `obstore`_: zero-dependency access to Amazon S3, Google Cloud Storage, and Azure Blob Storage using the underlying Rust `object_store`_ library, with protocols "s3://", "gs://", and "abfs://".
|
|
238
239
|
- `ocifs`_ for access to Oracle Cloud Object Storage, with protocol "oci://"
|
|
239
240
|
- `ocilake`_ for OCI Data Lake storage
|
|
240
241
|
- `ossfs`_ for Alibaba Cloud (Aliyun) Object Storage System (OSS)
|
|
@@ -250,6 +251,7 @@ documentation carefully before using any particular package.
|
|
|
250
251
|
- `wandbfsspec`_ to access Weights & Biases (experimental)
|
|
251
252
|
- `webdav4`_ for WebDAV, with protocol "webdav://" or "dav://"
|
|
252
253
|
- `xrootd`_ for xrootd, with protocol "root://"
|
|
254
|
+
- `msgraphfs`_ for Microsoft storage (ie Sharepoint) using the drive API through Microsoft Graph, with protocol "msgd://"
|
|
253
255
|
|
|
254
256
|
.. _abfs: https://github.com/dask/adlfs
|
|
255
257
|
.. _adl: https://github.com/dask/adlfs
|
|
@@ -270,6 +272,8 @@ documentation carefully before using any particular package.
|
|
|
270
272
|
.. _irods: https://github.com/xwcl/irods_fsspec
|
|
271
273
|
.. _lakefs: https://github.com/aai-institute/lakefs-spec
|
|
272
274
|
.. _morefs: https://github.com/iterative/morefs
|
|
275
|
+
.. _object_store: https://docs.rs/object_store/latest/object_store/
|
|
276
|
+
.. _obstore: https://developmentseed.org/obstore/latest/
|
|
273
277
|
.. _ocifs: https://ocifs.readthedocs.io/en/latest/
|
|
274
278
|
.. _ocilake: https://github.com/oracle/ocifs
|
|
275
279
|
.. _ossfs: https://github.com/fsspec/ossfs
|
|
@@ -284,6 +288,7 @@ documentation carefully before using any particular package.
|
|
|
284
288
|
.. _wandbfsspec: https://github.com/alvarobartt/wandbfsspec
|
|
285
289
|
.. _webdav4: https://github.com/skshetry/webdav4
|
|
286
290
|
.. _xrootd: https://github.com/CoffeaTeam/fsspec-xrootd
|
|
291
|
+
.. _msgraphfs: https://github.com/acsone/msgraphfs
|
|
287
292
|
|
|
288
293
|
.. _readbuffering:
|
|
289
294
|
|
|
@@ -1,6 +1,53 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
2025.9.0
|
|
5
|
+
--------
|
|
6
|
+
|
|
7
|
+
Enhancements
|
|
8
|
+
|
|
9
|
+
- include Last_modified info from HTTP headers to info (#1909)
|
|
10
|
+
- add optional semaphore to async-wrapper (#1908)
|
|
11
|
+
|
|
12
|
+
Fixes
|
|
13
|
+
|
|
14
|
+
- ensure cachingFSs show correct protocol (#1897)
|
|
15
|
+
- fix simplecache cat_ranges (#1892)
|
|
16
|
+
|
|
17
|
+
Other
|
|
18
|
+
|
|
19
|
+
- Style (#1894)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
2025.7.0
|
|
23
|
+
--------
|
|
24
|
+
|
|
25
|
+
Enhancements
|
|
26
|
+
|
|
27
|
+
- only download HTML mime for http listing (#1889)
|
|
28
|
+
- add tos:// to registry (#1878)
|
|
29
|
+
|
|
30
|
+
Fixes
|
|
31
|
+
|
|
32
|
+
- use st_birthtime in localFS, if available (#1883)
|
|
33
|
+
- allow cat_* in simplecache (#1881)
|
|
34
|
+
- remove deprecated asyncio use (#1862)
|
|
35
|
+
- create event loop if it doesn't exist (#1857)
|
|
36
|
+
|
|
37
|
+
Other
|
|
38
|
+
|
|
39
|
+
- remove references to py38 (#1888)
|
|
40
|
+
- ruff updates (#1887, 1864)
|
|
41
|
+
- github rate limits in CI (#1879, 1877)
|
|
42
|
+
- acknowledge Anaconda support (#1876)
|
|
43
|
+
- add obstore to known implementations (#1875)
|
|
44
|
+
- add Microsoft storage to known implementations (#1853)
|
|
45
|
+
- use builtins zstd for py3.14 (#1874)
|
|
46
|
+
- gdrivefs -> gdrive_fsspec (#1858)
|
|
47
|
+
- windows version in CI (#1855)
|
|
48
|
+
- error message typo (#1854)
|
|
49
|
+
|
|
50
|
+
|
|
4
51
|
2025.5.1
|
|
5
52
|
--------
|
|
6
53
|
|
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
# file generated by setuptools-scm
|
|
2
2
|
# don't change, don't track in version control
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
5
12
|
|
|
6
13
|
TYPE_CHECKING = False
|
|
7
14
|
if TYPE_CHECKING:
|
|
@@ -9,13 +16,19 @@ if TYPE_CHECKING:
|
|
|
9
16
|
from typing import Union
|
|
10
17
|
|
|
11
18
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
12
20
|
else:
|
|
13
21
|
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
14
23
|
|
|
15
24
|
version: str
|
|
16
25
|
__version__: str
|
|
17
26
|
__version_tuple__: VERSION_TUPLE
|
|
18
27
|
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
19
30
|
|
|
20
|
-
__version__ = version = '2025.
|
|
21
|
-
__version_tuple__ = version_tuple = (2025,
|
|
31
|
+
__version__ = version = '2025.9.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (2025, 9, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -7,9 +7,9 @@ import numbers
|
|
|
7
7
|
import os
|
|
8
8
|
import re
|
|
9
9
|
import threading
|
|
10
|
-
from
|
|
10
|
+
from collections.abc import Iterable
|
|
11
11
|
from glob import has_magic
|
|
12
|
-
from typing import TYPE_CHECKING
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
13
|
|
|
14
14
|
from .callbacks import DEFAULT_CALLBACK
|
|
15
15
|
from .exceptions import FSTimeoutError
|
|
@@ -120,18 +120,6 @@ def sync_wrapper(func, obj=None):
|
|
|
120
120
|
return wrapper
|
|
121
121
|
|
|
122
122
|
|
|
123
|
-
@contextmanager
|
|
124
|
-
def _selector_policy():
|
|
125
|
-
original_policy = asyncio.get_event_loop_policy()
|
|
126
|
-
try:
|
|
127
|
-
if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
|
|
128
|
-
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|
129
|
-
|
|
130
|
-
yield
|
|
131
|
-
finally:
|
|
132
|
-
asyncio.set_event_loop_policy(original_policy)
|
|
133
|
-
|
|
134
|
-
|
|
135
123
|
def get_loop():
|
|
136
124
|
"""Create or return the default fsspec IO loop
|
|
137
125
|
|
|
@@ -142,8 +130,7 @@ def get_loop():
|
|
|
142
130
|
# repeat the check just in case the loop got filled between the
|
|
143
131
|
# previous two calls from another thread
|
|
144
132
|
if loop[0] is None:
|
|
145
|
-
|
|
146
|
-
loop[0] = asyncio.new_event_loop()
|
|
133
|
+
loop[0] = asyncio.new_event_loop()
|
|
147
134
|
th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
|
|
148
135
|
th.daemon = True
|
|
149
136
|
th.start()
|
|
@@ -7,6 +7,7 @@ import math
|
|
|
7
7
|
import os
|
|
8
8
|
import threading
|
|
9
9
|
import warnings
|
|
10
|
+
from collections import OrderedDict
|
|
10
11
|
from concurrent.futures import Future, ThreadPoolExecutor
|
|
11
12
|
from itertools import groupby
|
|
12
13
|
from operator import itemgetter
|
|
@@ -17,8 +18,6 @@ from typing import (
|
|
|
17
18
|
ClassVar,
|
|
18
19
|
Generic,
|
|
19
20
|
NamedTuple,
|
|
20
|
-
Optional,
|
|
21
|
-
OrderedDict,
|
|
22
21
|
TypeVar,
|
|
23
22
|
)
|
|
24
23
|
|
|
@@ -629,7 +628,7 @@ class KnownPartsOfAFile(BaseCache):
|
|
|
629
628
|
blocksize: int,
|
|
630
629
|
fetcher: Fetcher,
|
|
631
630
|
size: int,
|
|
632
|
-
data:
|
|
631
|
+
data: dict[tuple[int, int], bytes] | None = None,
|
|
633
632
|
strict: bool = True,
|
|
634
633
|
**_: Any,
|
|
635
634
|
):
|
|
@@ -155,19 +155,26 @@ except ImportError:
|
|
|
155
155
|
pass
|
|
156
156
|
|
|
157
157
|
try:
|
|
158
|
-
|
|
158
|
+
# zstd in the standard library for python >= 3.14
|
|
159
|
+
from compression.zstd import ZstdFile
|
|
159
160
|
|
|
160
|
-
|
|
161
|
-
if "r" in mode:
|
|
162
|
-
cctx = zstd.ZstdDecompressor()
|
|
163
|
-
return cctx.stream_reader(infile)
|
|
164
|
-
else:
|
|
165
|
-
cctx = zstd.ZstdCompressor(level=10)
|
|
166
|
-
return cctx.stream_writer(infile)
|
|
161
|
+
register_compression("zstd", ZstdFile, "zst")
|
|
167
162
|
|
|
168
|
-
register_compression("zstd", zstandard_file, "zst")
|
|
169
163
|
except ImportError:
|
|
170
|
-
|
|
164
|
+
try:
|
|
165
|
+
import zstandard as zstd
|
|
166
|
+
|
|
167
|
+
def zstandard_file(infile, mode="rb"):
|
|
168
|
+
if "r" in mode:
|
|
169
|
+
cctx = zstd.ZstdDecompressor()
|
|
170
|
+
return cctx.stream_reader(infile)
|
|
171
|
+
else:
|
|
172
|
+
cctx = zstd.ZstdCompressor(level=10)
|
|
173
|
+
return cctx.stream_writer(infile)
|
|
174
|
+
|
|
175
|
+
register_compression("zstd", zstandard_file, "zst")
|
|
176
|
+
except ImportError:
|
|
177
|
+
pass
|
|
171
178
|
|
|
172
179
|
|
|
173
180
|
def available_compressions():
|
|
@@ -5,7 +5,6 @@ import logging
|
|
|
5
5
|
import os
|
|
6
6
|
import shutil
|
|
7
7
|
import uuid
|
|
8
|
-
from typing import Optional
|
|
9
8
|
|
|
10
9
|
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
|
11
10
|
from .callbacks import DEFAULT_CALLBACK
|
|
@@ -289,7 +288,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
289
288
|
url2,
|
|
290
289
|
blocksize=2**20,
|
|
291
290
|
callback=DEFAULT_CALLBACK,
|
|
292
|
-
tempdir:
|
|
291
|
+
tempdir: str | None = None,
|
|
293
292
|
**kwargs,
|
|
294
293
|
):
|
|
295
294
|
fs = _resolve_fs(url, self.method)
|
|
@@ -319,9 +318,9 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
319
318
|
path2: list[str],
|
|
320
319
|
recursive: bool = False,
|
|
321
320
|
on_error: str = "ignore",
|
|
322
|
-
maxdepth:
|
|
323
|
-
batch_size:
|
|
324
|
-
tempdir:
|
|
321
|
+
maxdepth: int | None = None,
|
|
322
|
+
batch_size: int | None = None,
|
|
323
|
+
tempdir: str | None = None,
|
|
325
324
|
**kwargs,
|
|
326
325
|
):
|
|
327
326
|
# TODO: special case for one FS being local, which can use get/put
|
|
@@ -6,7 +6,7 @@ import fsspec
|
|
|
6
6
|
from fsspec.asyn import AsyncFileSystem, running_async
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def async_wrapper(func, obj=None):
|
|
9
|
+
def async_wrapper(func, obj=None, semaphore=None):
|
|
10
10
|
"""
|
|
11
11
|
Wraps a synchronous function to make it awaitable.
|
|
12
12
|
|
|
@@ -16,6 +16,8 @@ def async_wrapper(func, obj=None):
|
|
|
16
16
|
The synchronous function to wrap.
|
|
17
17
|
obj : object, optional
|
|
18
18
|
The instance to bind the function to, if applicable.
|
|
19
|
+
semaphore : asyncio.Semaphore, optional
|
|
20
|
+
A semaphore to limit concurrent calls.
|
|
19
21
|
|
|
20
22
|
Returns
|
|
21
23
|
-------
|
|
@@ -25,6 +27,9 @@ def async_wrapper(func, obj=None):
|
|
|
25
27
|
|
|
26
28
|
@functools.wraps(func)
|
|
27
29
|
async def wrapper(*args, **kwargs):
|
|
30
|
+
if semaphore:
|
|
31
|
+
async with semaphore:
|
|
32
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
|
28
33
|
return await asyncio.to_thread(func, *args, **kwargs)
|
|
29
34
|
|
|
30
35
|
return wrapper
|
|
@@ -52,6 +57,8 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
|
52
57
|
asynchronous=None,
|
|
53
58
|
target_protocol=None,
|
|
54
59
|
target_options=None,
|
|
60
|
+
semaphore=None,
|
|
61
|
+
max_concurrent_tasks=None,
|
|
55
62
|
**kwargs,
|
|
56
63
|
):
|
|
57
64
|
if asynchronous is None:
|
|
@@ -62,6 +69,7 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
|
62
69
|
else:
|
|
63
70
|
self.sync_fs = fsspec.filesystem(target_protocol, **target_options)
|
|
64
71
|
self.protocol = self.sync_fs.protocol
|
|
72
|
+
self.semaphore = semaphore
|
|
65
73
|
self._wrap_all_sync_methods()
|
|
66
74
|
|
|
67
75
|
@property
|
|
@@ -82,8 +90,8 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
|
82
90
|
continue
|
|
83
91
|
|
|
84
92
|
method = getattr(self.sync_fs, method_name)
|
|
85
|
-
if callable(method) and not
|
|
86
|
-
async_method = async_wrapper(method, obj=self)
|
|
93
|
+
if callable(method) and not inspect.iscoroutinefunction(method):
|
|
94
|
+
async_method = async_wrapper(method, obj=self, semaphore=self.semaphore)
|
|
87
95
|
setattr(self, f"_{method_name}", async_method)
|
|
88
96
|
|
|
89
97
|
@classmethod
|
|
@@ -14,13 +14,14 @@ except ImportError:
|
|
|
14
14
|
import json
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
|
-
from
|
|
17
|
+
from collections.abc import Iterator
|
|
18
|
+
from typing import Any, Literal
|
|
18
19
|
|
|
19
20
|
from typing_extensions import TypeAlias
|
|
20
21
|
|
|
21
22
|
from .cached import CachingFileSystem
|
|
22
23
|
|
|
23
|
-
Detail: TypeAlias =
|
|
24
|
+
Detail: TypeAlias = dict[str, Any]
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
class CacheMetadata:
|
|
@@ -16,6 +16,7 @@ from fsspec.core import BaseCache, MMapCache
|
|
|
16
16
|
from fsspec.exceptions import BlocksizeMismatchError
|
|
17
17
|
from fsspec.implementations.cache_mapper import create_cache_mapper
|
|
18
18
|
from fsspec.implementations.cache_metadata import CacheMetadata
|
|
19
|
+
from fsspec.implementations.local import LocalFileSystem
|
|
19
20
|
from fsspec.spec import AbstractBufferedFile
|
|
20
21
|
from fsspec.transaction import Transaction
|
|
21
22
|
from fsspec.utils import infer_compression
|
|
@@ -338,7 +339,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
338
339
|
# explicitly submitting the size to the open call will avoid extra
|
|
339
340
|
# operations when opening. This is particularly relevant
|
|
340
341
|
# for any file that is read over a network, e.g. S3.
|
|
341
|
-
size = detail.get("size"
|
|
342
|
+
size = detail.get("size")
|
|
342
343
|
|
|
343
344
|
# call target filesystems open
|
|
344
345
|
self._mkcache()
|
|
@@ -433,7 +434,9 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
433
434
|
"open",
|
|
434
435
|
"cat",
|
|
435
436
|
"cat_file",
|
|
437
|
+
"_cat_file",
|
|
436
438
|
"cat_ranges",
|
|
439
|
+
"_cat_ranges",
|
|
437
440
|
"get",
|
|
438
441
|
"read_block",
|
|
439
442
|
"tail",
|
|
@@ -475,7 +478,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
475
478
|
if item in ["transaction"]:
|
|
476
479
|
# property
|
|
477
480
|
return type(self).transaction.__get__(self)
|
|
478
|
-
if item in
|
|
481
|
+
if item in {"_cache", "transaction_type", "protocol"}:
|
|
479
482
|
# class attributes
|
|
480
483
|
return getattr(type(self), item)
|
|
481
484
|
if item == "__class__":
|
|
@@ -835,14 +838,56 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
835
838
|
else:
|
|
836
839
|
raise ValueError("path must be str or dict")
|
|
837
840
|
|
|
841
|
+
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
|
842
|
+
logger.debug("async cat_file %s", path)
|
|
843
|
+
path = self._strip_protocol(path)
|
|
844
|
+
sha = self._mapper(path)
|
|
845
|
+
fn = self._check_file(path)
|
|
846
|
+
|
|
847
|
+
if not fn:
|
|
848
|
+
fn = os.path.join(self.storage[-1], sha)
|
|
849
|
+
await self.fs._get_file(path, fn, **kwargs)
|
|
850
|
+
|
|
851
|
+
with open(fn, "rb") as f: # noqa ASYNC230
|
|
852
|
+
if start:
|
|
853
|
+
f.seek(start)
|
|
854
|
+
size = -1 if end is None else end - f.tell()
|
|
855
|
+
return f.read(size)
|
|
856
|
+
|
|
857
|
+
async def _cat_ranges(
|
|
858
|
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
|
859
|
+
):
|
|
860
|
+
logger.debug("async cat ranges %s", paths)
|
|
861
|
+
lpaths = []
|
|
862
|
+
rset = set()
|
|
863
|
+
download = []
|
|
864
|
+
rpaths = []
|
|
865
|
+
for p in paths:
|
|
866
|
+
fn = self._check_file(p)
|
|
867
|
+
if fn is None and p not in rset:
|
|
868
|
+
sha = self._mapper(p)
|
|
869
|
+
fn = os.path.join(self.storage[-1], sha)
|
|
870
|
+
download.append(fn)
|
|
871
|
+
rset.add(p)
|
|
872
|
+
rpaths.append(p)
|
|
873
|
+
lpaths.append(fn)
|
|
874
|
+
if download:
|
|
875
|
+
await self.fs._get(rpaths, download, on_error=on_error)
|
|
876
|
+
|
|
877
|
+
return LocalFileSystem().cat_ranges(
|
|
878
|
+
lpaths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
|
|
879
|
+
)
|
|
880
|
+
|
|
838
881
|
def cat_ranges(
|
|
839
882
|
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
|
840
883
|
):
|
|
884
|
+
logger.debug("cat ranges %s", paths)
|
|
841
885
|
lpaths = [self._check_file(p) for p in paths]
|
|
842
886
|
rpaths = [p for l, p in zip(lpaths, paths) if l is False]
|
|
843
887
|
lpaths = [l for l, p in zip(lpaths, paths) if l is False]
|
|
844
888
|
self.fs.get(rpaths, lpaths)
|
|
845
|
-
|
|
889
|
+
paths = [self._check_file(p) for p in paths]
|
|
890
|
+
return LocalFileSystem().cat_ranges(
|
|
846
891
|
paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
|
|
847
892
|
)
|
|
848
893
|
|
|
@@ -940,7 +985,7 @@ class LocalTempFile:
|
|
|
940
985
|
|
|
941
986
|
def commit(self):
|
|
942
987
|
self.fs.put(self.fn, self.path, **self.kwargs)
|
|
943
|
-
# we do not delete local copy
|
|
988
|
+
# we do not delete the local copy, it's still in the cache.
|
|
944
989
|
|
|
945
990
|
@property
|
|
946
991
|
def name(self):
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import base64
|
|
2
4
|
import urllib
|
|
3
5
|
|
|
4
6
|
import requests
|
|
5
|
-
import requests.exceptions
|
|
6
7
|
from requests.adapters import HTTPAdapter, Retry
|
|
8
|
+
from typing_extensions import override
|
|
7
9
|
|
|
8
10
|
from fsspec import AbstractFileSystem
|
|
9
11
|
from fsspec.spec import AbstractBufferedFile
|
|
@@ -57,6 +59,24 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
57
59
|
|
|
58
60
|
super().__init__(**kwargs)
|
|
59
61
|
|
|
62
|
+
@override
|
|
63
|
+
def _ls_from_cache(self, path) -> list[dict[str, str | int]] | None:
|
|
64
|
+
"""Check cache for listing
|
|
65
|
+
|
|
66
|
+
Returns listing, if found (may be empty list for a directory that
|
|
67
|
+
exists but contains nothing), None if not in cache.
|
|
68
|
+
"""
|
|
69
|
+
self.dircache.pop(path.rstrip("/"), None)
|
|
70
|
+
|
|
71
|
+
parent = self._parent(path)
|
|
72
|
+
if parent in self.dircache:
|
|
73
|
+
for entry in self.dircache[parent]:
|
|
74
|
+
if entry["name"] == path.rstrip("/"):
|
|
75
|
+
if entry["type"] != "directory":
|
|
76
|
+
return [entry]
|
|
77
|
+
return []
|
|
78
|
+
raise FileNotFoundError(path)
|
|
79
|
+
|
|
60
80
|
def ls(self, path, detail=True, **kwargs):
|
|
61
81
|
"""
|
|
62
82
|
List the contents of the given path.
|
|
@@ -70,7 +90,15 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
70
90
|
but also additional information on file sizes
|
|
71
91
|
and types.
|
|
72
92
|
"""
|
|
73
|
-
|
|
93
|
+
try:
|
|
94
|
+
out = self._ls_from_cache(path)
|
|
95
|
+
except FileNotFoundError:
|
|
96
|
+
# This happens if the `path`'s parent was cached, but `path` is not
|
|
97
|
+
# there. This suggests that `path` is new since the parent was
|
|
98
|
+
# cached. Attempt to invalidate parent's cache before continuing.
|
|
99
|
+
self.dircache.pop(self._parent(path), None)
|
|
100
|
+
out = None
|
|
101
|
+
|
|
74
102
|
if not out:
|
|
75
103
|
try:
|
|
76
104
|
r = self._send_to_api(
|
|
@@ -460,7 +488,7 @@ class DatabricksFile(AbstractBufferedFile):
|
|
|
460
488
|
return return_buffer
|
|
461
489
|
|
|
462
490
|
def _to_sized_blocks(self, length, start=0):
|
|
463
|
-
"""Helper function to split a range from 0 to total_length into
|
|
491
|
+
"""Helper function to split a range from 0 to total_length into blocksizes"""
|
|
464
492
|
end = start + length
|
|
465
493
|
for data_chunk in range(start, end, self.blocksize):
|
|
466
494
|
data_start = data_chunk
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import uuid
|
|
4
|
-
import warnings
|
|
5
3
|
from ftplib import FTP, FTP_TLS, Error, error_perm
|
|
6
4
|
from typing import Any
|
|
7
5
|
|
|
@@ -81,13 +79,7 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
81
79
|
ftp_cls = FTP_TLS
|
|
82
80
|
else:
|
|
83
81
|
ftp_cls = FTP
|
|
84
|
-
|
|
85
|
-
self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
|
|
86
|
-
elif self.encoding:
|
|
87
|
-
warnings.warn("`encoding` not supported for python<3.9, ignoring")
|
|
88
|
-
self.ftp = ftp_cls(timeout=self.timeout)
|
|
89
|
-
else:
|
|
90
|
-
self.ftp = ftp_cls(timeout=self.timeout)
|
|
82
|
+
self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
|
|
91
83
|
self.ftp.connect(self.host, self.port)
|
|
92
84
|
self.ftp.login(*self.cred)
|
|
93
85
|
|
|
@@ -62,8 +62,7 @@ class GitFileSystem(AbstractFileSystem):
|
|
|
62
62
|
|
|
63
63
|
@staticmethod
|
|
64
64
|
def _get_kwargs_from_urls(path):
|
|
65
|
-
|
|
66
|
-
path = path[6:]
|
|
65
|
+
path = path.removeprefix("git://")
|
|
67
66
|
out = {}
|
|
68
67
|
if ":" in path:
|
|
69
68
|
out["path"], path = path.split(":", 1)
|
|
@@ -80,12 +80,12 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
80
80
|
https://docs.aiohttp.org/en/stable/client_reference.html
|
|
81
81
|
For example, ``{'auth': aiohttp.BasicAuth('user', 'pass')}``
|
|
82
82
|
get_client: Callable[..., aiohttp.ClientSession]
|
|
83
|
-
A callable which takes keyword arguments and constructs
|
|
84
|
-
an aiohttp.ClientSession.
|
|
83
|
+
A callable, which takes keyword arguments and constructs
|
|
84
|
+
an aiohttp.ClientSession. Its state will be managed by
|
|
85
85
|
the HTTPFileSystem class.
|
|
86
86
|
storage_options: key-value
|
|
87
87
|
Any other parameters passed on to requests
|
|
88
|
-
cache_type, cache_options: defaults used in open
|
|
88
|
+
cache_type, cache_options: defaults used in open()
|
|
89
89
|
"""
|
|
90
90
|
super().__init__(self, asynchronous=asynchronous, loop=loop, **storage_options)
|
|
91
91
|
self.block_size = block_size if block_size is not None else DEFAULT_BLOCK_SIZE
|
|
@@ -158,14 +158,24 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
158
158
|
session = await self.set_session()
|
|
159
159
|
async with session.get(self.encode_url(url), **self.kwargs) as r:
|
|
160
160
|
self._raise_not_found_for_status(r, url)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
161
|
+
|
|
162
|
+
if "Content-Type" in r.headers:
|
|
163
|
+
mimetype = r.headers["Content-Type"].partition(";")[0]
|
|
164
|
+
else:
|
|
165
|
+
mimetype = None
|
|
166
|
+
|
|
167
|
+
if mimetype in ("text/html", None):
|
|
168
|
+
try:
|
|
169
|
+
text = await r.text(errors="ignore")
|
|
170
|
+
if self.simple_links:
|
|
171
|
+
links = ex2.findall(text) + [u[2] for u in ex.findall(text)]
|
|
172
|
+
else:
|
|
173
|
+
links = [u[2] for u in ex.findall(text)]
|
|
174
|
+
except UnicodeDecodeError:
|
|
175
|
+
links = [] # binary, not HTML
|
|
176
|
+
else:
|
|
177
|
+
links = []
|
|
178
|
+
|
|
169
179
|
out = set()
|
|
170
180
|
parts = urlparse(url)
|
|
171
181
|
for l in links:
|
|
@@ -254,7 +264,7 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
254
264
|
if isfilelike(lpath):
|
|
255
265
|
outfile = lpath
|
|
256
266
|
else:
|
|
257
|
-
outfile = open(lpath, "wb") # noqa:
|
|
267
|
+
outfile = open(lpath, "wb") # noqa: ASYNC230
|
|
258
268
|
|
|
259
269
|
try:
|
|
260
270
|
chunk = True
|
|
@@ -286,7 +296,7 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
286
296
|
context = nullcontext(lpath)
|
|
287
297
|
use_seek = False # might not support seeking
|
|
288
298
|
else:
|
|
289
|
-
context = open(lpath, "rb") # noqa:
|
|
299
|
+
context = open(lpath, "rb") # noqa: ASYNC230
|
|
290
300
|
use_seek = True
|
|
291
301
|
|
|
292
302
|
with context as f:
|
|
@@ -812,7 +822,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
|
|
|
812
822
|
async with r:
|
|
813
823
|
out = await r.read()
|
|
814
824
|
if file:
|
|
815
|
-
with open(file, "r+b") as f: # noqa:
|
|
825
|
+
with open(file, "r+b") as f: # noqa: ASYNC230
|
|
816
826
|
f.seek(start)
|
|
817
827
|
f.write(out)
|
|
818
828
|
else:
|
|
@@ -863,7 +873,7 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
863
873
|
|
|
864
874
|
info["url"] = str(r.url)
|
|
865
875
|
|
|
866
|
-
for checksum_field in ["ETag", "Content-MD5", "Digest"]:
|
|
876
|
+
for checksum_field in ["ETag", "Content-MD5", "Digest", "Last-Modified"]:
|
|
867
877
|
if r.headers.get(checksum_field):
|
|
868
878
|
info[checksum_field] = r.headers[checksum_field]
|
|
869
879
|
|
|
@@ -109,11 +109,15 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
109
109
|
t = "file"
|
|
110
110
|
else:
|
|
111
111
|
t = "other"
|
|
112
|
+
|
|
113
|
+
# Check for the 'st_birthtime' attribute, which is not always present; fallback to st_ctime
|
|
114
|
+
created_time = getattr(out, "st_birthtime", out.st_ctime)
|
|
115
|
+
|
|
112
116
|
result = {
|
|
113
117
|
"name": path,
|
|
114
118
|
"size": size,
|
|
115
119
|
"type": t,
|
|
116
|
-
"created":
|
|
120
|
+
"created": created_time,
|
|
117
121
|
"islink": link,
|
|
118
122
|
}
|
|
119
123
|
for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
|
|
@@ -34,8 +34,7 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
34
34
|
else:
|
|
35
35
|
path = stringify_path(path)
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
path = path[len("memory://") :]
|
|
37
|
+
path = path.removeprefix("memory://")
|
|
39
38
|
if "::" in path or "://" in path:
|
|
40
39
|
return path.rstrip("/")
|
|
41
40
|
path = path.lstrip("/").rstrip("/")
|
|
@@ -768,7 +768,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
768
768
|
self.fss[k] = AsyncFileSystemWrapper(f, asynchronous=self.asynchronous)
|
|
769
769
|
elif self.asynchronous ^ f.asynchronous:
|
|
770
770
|
raise ValueError(
|
|
771
|
-
"Reference-FS's target filesystem must have same value"
|
|
771
|
+
"Reference-FS's target filesystem must have same value "
|
|
772
772
|
"of asynchronous"
|
|
773
773
|
)
|
|
774
774
|
|
|
@@ -1,16 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from collections.abc import Mapping, Sequence
|
|
2
3
|
from contextlib import suppress
|
|
3
4
|
from pathlib import PurePath
|
|
4
5
|
from typing import (
|
|
5
6
|
Any,
|
|
6
7
|
Callable,
|
|
7
8
|
ClassVar,
|
|
8
|
-
Dict,
|
|
9
|
-
List,
|
|
10
|
-
Mapping,
|
|
11
9
|
Optional,
|
|
12
|
-
Sequence,
|
|
13
|
-
Tuple,
|
|
14
10
|
)
|
|
15
11
|
|
|
16
12
|
from .registry import _import_class, get_filesystem_class
|
|
@@ -49,12 +45,12 @@ class FilesystemJSONDecoder(json.JSONDecoder):
|
|
|
49
45
|
def __init__(
|
|
50
46
|
self,
|
|
51
47
|
*,
|
|
52
|
-
object_hook: Optional[Callable[[
|
|
48
|
+
object_hook: Optional[Callable[[dict[str, Any]], Any]] = None,
|
|
53
49
|
parse_float: Optional[Callable[[str], Any]] = None,
|
|
54
50
|
parse_int: Optional[Callable[[str], Any]] = None,
|
|
55
51
|
parse_constant: Optional[Callable[[str], Any]] = None,
|
|
56
52
|
strict: bool = True,
|
|
57
|
-
object_pairs_hook: Optional[Callable[[
|
|
53
|
+
object_pairs_hook: Optional[Callable[[list[tuple[str, Any]]], Any]] = None,
|
|
58
54
|
) -> None:
|
|
59
55
|
self.original_object_hook = object_hook
|
|
60
56
|
|
|
@@ -68,7 +64,7 @@ class FilesystemJSONDecoder(json.JSONDecoder):
|
|
|
68
64
|
)
|
|
69
65
|
|
|
70
66
|
@classmethod
|
|
71
|
-
def try_resolve_path_cls(cls, dct:
|
|
67
|
+
def try_resolve_path_cls(cls, dct: dict[str, Any]):
|
|
72
68
|
with suppress(Exception):
|
|
73
69
|
fqp = dct["cls"]
|
|
74
70
|
|
|
@@ -80,7 +76,7 @@ class FilesystemJSONDecoder(json.JSONDecoder):
|
|
|
80
76
|
return None
|
|
81
77
|
|
|
82
78
|
@classmethod
|
|
83
|
-
def try_resolve_fs_cls(cls, dct:
|
|
79
|
+
def try_resolve_fs_cls(cls, dct: dict[str, Any]):
|
|
84
80
|
with suppress(Exception):
|
|
85
81
|
if "cls" in dct:
|
|
86
82
|
try:
|
|
@@ -95,7 +91,7 @@ class FilesystemJSONDecoder(json.JSONDecoder):
|
|
|
95
91
|
|
|
96
92
|
return None
|
|
97
93
|
|
|
98
|
-
def custom_object_hook(self, dct:
|
|
94
|
+
def custom_object_hook(self, dct: dict[str, Any]):
|
|
99
95
|
if "cls" in dct:
|
|
100
96
|
if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
|
|
101
97
|
return AbstractFileSystem.from_dict(dct)
|
|
@@ -118,8 +118,8 @@ known_implementations = {
|
|
|
118
118
|
"err": "Please install gcsfs to access Google Storage",
|
|
119
119
|
},
|
|
120
120
|
"gdrive": {
|
|
121
|
-
"class": "
|
|
122
|
-
"err": "Please install
|
|
121
|
+
"class": "gdrive_fsspec.GoogleDriveFileSystem",
|
|
122
|
+
"err": "Please install gdrive_fs for access to Google Drive",
|
|
123
123
|
},
|
|
124
124
|
"generic": {"class": "fsspec.generic.GenericFileSystem"},
|
|
125
125
|
"gist": {
|
|
@@ -213,6 +213,10 @@ known_implementations = {
|
|
|
213
213
|
"err": 'SFTPFileSystem requires "paramiko" to be installed',
|
|
214
214
|
},
|
|
215
215
|
"tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
|
|
216
|
+
"tos": {
|
|
217
|
+
"class": "tosfs.TosFileSystem",
|
|
218
|
+
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
|
219
|
+
},
|
|
216
220
|
"tosfs": {
|
|
217
221
|
"class": "tosfs.TosFileSystem",
|
|
218
222
|
"err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
|
|
@@ -7,6 +7,7 @@ import os
|
|
|
7
7
|
import re
|
|
8
8
|
import sys
|
|
9
9
|
import tempfile
|
|
10
|
+
from collections.abc import Iterable, Iterator, Sequence
|
|
10
11
|
from functools import partial
|
|
11
12
|
from hashlib import md5
|
|
12
13
|
from importlib.metadata import version
|
|
@@ -15,9 +16,6 @@ from typing import (
|
|
|
15
16
|
TYPE_CHECKING,
|
|
16
17
|
Any,
|
|
17
18
|
Callable,
|
|
18
|
-
Iterable,
|
|
19
|
-
Iterator,
|
|
20
|
-
Sequence,
|
|
21
19
|
TypeVar,
|
|
22
20
|
)
|
|
23
21
|
from urllib.parse import urlsplit
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["hatchling", "hatch-vcs"]
|
|
2
|
+
requires = ["hatchling>=1.27.0", "hatch-vcs"]
|
|
3
3
|
build-backend = "hatchling.build"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
@@ -7,14 +7,14 @@ name = "fsspec"
|
|
|
7
7
|
dynamic = ["version"]
|
|
8
8
|
description = "File-system specification"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
license =
|
|
10
|
+
license = "BSD-3-Clause"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
11
12
|
requires-python = ">=3.9"
|
|
12
13
|
maintainers = [{ name = "Martin Durant", email = "mdurant@anaconda.com" }]
|
|
13
14
|
keywords = ["file"]
|
|
14
15
|
classifiers = [
|
|
15
16
|
"Development Status :: 4 - Beta",
|
|
16
17
|
"Intended Audience :: Developers",
|
|
17
|
-
"License :: OSI Approved :: BSD License",
|
|
18
18
|
"Operating System :: OS Independent",
|
|
19
19
|
"Programming Language :: Python :: 3.9",
|
|
20
20
|
"Programming Language :: Python :: 3.10",
|
|
@@ -28,7 +28,7 @@ abfs = ["adlfs"]
|
|
|
28
28
|
adl = ["adlfs"]
|
|
29
29
|
arrow = ["pyarrow >= 1"]
|
|
30
30
|
dask = ["dask", "distributed"]
|
|
31
|
-
dev = ["ruff", "pre-commit"]
|
|
31
|
+
dev = ["ruff >= 0.5", "pre-commit"]
|
|
32
32
|
doc = ["sphinx", "numpydoc", "sphinx-design", "sphinx-rtd-theme", "yarl"]
|
|
33
33
|
dropbox = ["dropbox", "dropboxdrivefs", "requests"]
|
|
34
34
|
entrypoints = []
|
|
@@ -117,7 +117,7 @@ test_full = [
|
|
|
117
117
|
'tqdm',
|
|
118
118
|
'urllib3',
|
|
119
119
|
'zarr',
|
|
120
|
-
'zstandard',
|
|
120
|
+
'zstandard; python_version < "3.14"',
|
|
121
121
|
]
|
|
122
122
|
test_downstream = [
|
|
123
123
|
"dask[dataframe,test]",
|
|
@@ -144,7 +144,6 @@ version-file = "fsspec/_version.py"
|
|
|
144
144
|
exclude = ["**/tests/*", "!**/tests/abstract/"]
|
|
145
145
|
|
|
146
146
|
[tool.ruff]
|
|
147
|
-
target-version = "py38"
|
|
148
147
|
exclude = [".tox", "build", "docs/source/conf.py", "fsspec/_version"]
|
|
149
148
|
line-length = 88
|
|
150
149
|
|
|
@@ -209,6 +208,7 @@ ignore = [
|
|
|
209
208
|
# Fix these codes later
|
|
210
209
|
"G004",
|
|
211
210
|
"PERF203",
|
|
211
|
+
"PLC0415",
|
|
212
212
|
"UP007",
|
|
213
213
|
"UP011",
|
|
214
214
|
"UP015",
|
|
@@ -219,6 +219,7 @@ ignore = [
|
|
|
219
219
|
"SIM114",
|
|
220
220
|
"SIM115",
|
|
221
221
|
"SIM117",
|
|
222
|
+
"TC003",
|
|
222
223
|
# https://github.com/astral-sh/ruff/issues/7871
|
|
223
224
|
"UP038",
|
|
224
225
|
# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|