fsspec 2025.3.1__tar.gz → 2025.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fsspec-2025.3.1 → fsspec-2025.5.0}/PKG-INFO +2 -3
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/api.rst +3 -1
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/changelog.rst +30 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/__init__.py +4 -2
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/_version.py +2 -2
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/caching.py +1 -1
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/generic.py +45 -61
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/asyn_wrapper.py +15 -4
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/cached.py +12 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/dbfs.py +3 -2
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/github.py +68 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/local.py +35 -2
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/registry.py +7 -3
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/spec.py +35 -7
- {fsspec-2025.3.1 → fsspec-2025.5.0}/pyproject.toml +2 -3
- fsspec-2025.3.1/Untitled.ipynb +0 -6
- fsspec-2025.3.1/correct_permissions.bin +0 -1
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.codespellrc +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.coveragerc +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.gitattributes +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.github/workflows/main.yaml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.github/workflows/pypipublish.yaml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.gitignore +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/.pre-commit-config.yaml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/LICENSE +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/README.md +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/ci/environment-downstream.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/ci/environment-friends.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/ci/environment-py38.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/ci/environment-typecheck.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/ci/environment-win.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/Makefile +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/README.md +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/environment.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/make.bat +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/_static/custom.css +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/async.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/conf.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/copying.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/developer.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/features.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/img/gui.png +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/index.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/intro.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/docs/source/usage.rst +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/archive.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/asyn.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/callbacks.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/compression.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/config.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/conftest.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/core.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/dircache.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/exceptions.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/fuse.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/gui.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/__init__.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/arrow.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/cache_mapper.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/cache_metadata.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/dask.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/data.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/dirfs.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/ftp.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/git.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/http.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/http_sync.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/jupyter.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/libarchive.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/memory.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/reference.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/sftp.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/smb.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/tar.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/webhdfs.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/implementations/zip.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/json.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/mapping.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/parquet.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/__init__.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/common.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/copy.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/get.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/mv.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/open.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/pipe.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/tests/abstract/put.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/transaction.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/fsspec/utils.py +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/install_s3fs.sh +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/readthedocs.yml +0 -0
- {fsspec-2025.3.1 → fsspec-2025.5.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version: 2025.
|
|
3
|
+
Version: 2025.5.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
|
|
6
6
|
Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
|
|
@@ -41,13 +41,12 @@ Classifier: Development Status :: 4 - Beta
|
|
|
41
41
|
Classifier: Intended Audience :: Developers
|
|
42
42
|
Classifier: License :: OSI Approved :: BSD License
|
|
43
43
|
Classifier: Operating System :: OS Independent
|
|
44
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
45
44
|
Classifier: Programming Language :: Python :: 3.9
|
|
46
45
|
Classifier: Programming Language :: Python :: 3.10
|
|
47
46
|
Classifier: Programming Language :: Python :: 3.11
|
|
48
47
|
Classifier: Programming Language :: Python :: 3.12
|
|
49
48
|
Classifier: Programming Language :: Python :: 3.13
|
|
50
|
-
Requires-Python: >=3.
|
|
49
|
+
Requires-Python: >=3.9
|
|
51
50
|
Provides-Extra: abfs
|
|
52
51
|
Requires-Dist: adlfs; extra == 'abfs'
|
|
53
52
|
Provides-Extra: adl
|
|
@@ -221,7 +221,7 @@ documentation carefully before using any particular package.
|
|
|
221
221
|
- `dropbox`_ for access to dropbox shares, with protocol "dropbox://"
|
|
222
222
|
- `dvc`_ to access DVC/Git repository as a filesystem
|
|
223
223
|
- `fsspec-encrypted`_ for transparent encryption on top of other fsspec filesystems.
|
|
224
|
-
- `gcsfs`_ for Google Cloud Storage, with protocol "gcs://"
|
|
224
|
+
- `gcsfs`_ for Google Cloud Storage, with protocol "gs://" or "gcs://"
|
|
225
225
|
- `gdrive`_ to access Google Drive and shares (experimental)
|
|
226
226
|
- `git`_ to access Git repositories
|
|
227
227
|
- `huggingface_hub`_ to access the Hugging Face Hub filesystem, with protocol "hf://"
|
|
@@ -237,6 +237,7 @@ documentation carefully before using any particular package.
|
|
|
237
237
|
- `p9fs`_ for 9P (Plan 9 Filesystem Protocol) servers
|
|
238
238
|
- `PyAthena`_ for S3 access to Amazon Athena, with protocol "s3://" or "s3a://"
|
|
239
239
|
- `PyDrive2`_ for Google Drive access
|
|
240
|
+
- `fsspec-proxy`_ for "pyscript:" URLs via a proxy server
|
|
240
241
|
- `s3fs`_ for Amazon S3 and other compatible stores, with protocol "s3://"
|
|
241
242
|
- `sshfs`_ for access to SSH servers, with protocol "ssh://" or "sftp://"
|
|
242
243
|
- `swiftspec`_ for OpenStack SWIFT, with protocol "swift://"
|
|
@@ -254,6 +255,7 @@ documentation carefully before using any particular package.
|
|
|
254
255
|
.. _dropbox: https://github.com/fsspec/dropboxdrivefs
|
|
255
256
|
.. _dvc: https://github.com/iterative/dvc
|
|
256
257
|
.. _fsspec-encrypted: https://github.com/thevgergroup/fsspec-encrypted
|
|
258
|
+
.. _fsspec-proxy: https://github.com/fsspec/fsspec-proxy
|
|
257
259
|
.. _gcsfs: https://gcsfs.readthedocs.io/en/latest/
|
|
258
260
|
.. _gdrive: https://github.com/fsspec/gdrivefs
|
|
259
261
|
.. _git: https://github.com/iterative/scmrepo
|
|
@@ -1,6 +1,36 @@
|
|
|
1
1
|
Changelog
|
|
2
2
|
=========
|
|
3
3
|
|
|
4
|
+
2025.5.0
|
|
5
|
+
--------
|
|
6
|
+
|
|
7
|
+
Enhancements
|
|
8
|
+
|
|
9
|
+
- add rm in github: (##1839)
|
|
10
|
+
- cachingFS performance, don't ls every time (#1833)
|
|
11
|
+
- pyscript: protocol and optional entrypoint (#1828)
|
|
12
|
+
|
|
13
|
+
Fixes
|
|
14
|
+
|
|
15
|
+
- improve cp in generic (#1835)
|
|
16
|
+
- fix ls in dbfs (#1834)
|
|
17
|
+
- fix parquet tests for pyarrow 20 (#1831)
|
|
18
|
+
- fix crosslink for local tempfile (#1829)
|
|
19
|
+
- keep permissions in local transaction (#1826)
|
|
20
|
+
- signature compatibility in local cp() (#1820)
|
|
21
|
+
|
|
22
|
+
Other
|
|
23
|
+
|
|
24
|
+
- add "gs" at mention of gcsfs in docs (#1840)
|
|
25
|
+
- lint style (#1837)
|
|
26
|
+
- glob docstring to reflect reality (#1825)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
2025.3.2
|
|
30
|
+
--------
|
|
31
|
+
|
|
32
|
+
- drop support for py3.8
|
|
33
|
+
|
|
4
34
|
2025.3.1
|
|
5
35
|
--------
|
|
6
36
|
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
from importlib.metadata import entry_points
|
|
2
|
-
|
|
3
1
|
from . import caching
|
|
4
2
|
from ._version import __version__ # noqa: F401
|
|
5
3
|
from .callbacks import Callback
|
|
@@ -38,6 +36,10 @@ __all__ = [
|
|
|
38
36
|
|
|
39
37
|
|
|
40
38
|
def process_entries():
|
|
39
|
+
try:
|
|
40
|
+
from importlib.metadata import entry_points
|
|
41
|
+
except ImportError:
|
|
42
|
+
return
|
|
41
43
|
if entry_points is not None:
|
|
42
44
|
try:
|
|
43
45
|
eps = entry_points()
|
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '2025.
|
|
21
|
-
__version_tuple__ = version_tuple = (2025,
|
|
20
|
+
__version__ = version = '2025.5.0'
|
|
21
|
+
__version_tuple__ = version_tuple = (2025, 5, 0)
|
|
@@ -37,7 +37,7 @@ T = TypeVar("T")
|
|
|
37
37
|
logger = logging.getLogger("fsspec")
|
|
38
38
|
|
|
39
39
|
Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
|
|
40
|
-
MultiFetcher = Callable[list[
|
|
40
|
+
MultiFetcher = Callable[[list[int, int]], bytes] # Maps [(start, end)] to bytes
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class BaseCache:
|
|
@@ -16,15 +16,13 @@ logger = logging.getLogger("fsspec.generic")
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def set_generic_fs(protocol, **storage_options):
|
|
19
|
+
"""Populate the dict used for method=="generic" lookups"""
|
|
19
20
|
_generic_fs[protocol] = filesystem(protocol, **storage_options)
|
|
20
21
|
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _resolve_fs(url, method=None, protocol=None, storage_options=None):
|
|
23
|
+
def _resolve_fs(url, method, protocol=None, storage_options=None):
|
|
26
24
|
"""Pick instance of backend FS"""
|
|
27
|
-
|
|
25
|
+
url = url[0] if isinstance(url, (list, tuple)) else url
|
|
28
26
|
protocol = protocol or split_protocol(url)[0]
|
|
29
27
|
storage_options = storage_options or {}
|
|
30
28
|
if method == "default":
|
|
@@ -159,7 +157,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
159
157
|
|
|
160
158
|
protocol = "generic" # there is no real reason to ever use a protocol with this FS
|
|
161
159
|
|
|
162
|
-
def __init__(self, default_method="default", **kwargs):
|
|
160
|
+
def __init__(self, default_method="default", storage_options=None, **kwargs):
|
|
163
161
|
"""
|
|
164
162
|
|
|
165
163
|
Parameters
|
|
@@ -171,22 +169,25 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
171
169
|
configured via the config system
|
|
172
170
|
- "generic": takes instances from the `_generic_fs` dict in this module,
|
|
173
171
|
which you must populate before use. Keys are by protocol
|
|
172
|
+
- "options": expects storage_options, a dict mapping protocol to
|
|
173
|
+
kwargs to use when constructing the filesystem
|
|
174
174
|
- "current": takes the most recently instantiated version of each FS
|
|
175
175
|
"""
|
|
176
176
|
self.method = default_method
|
|
177
|
+
self.st_opts = storage_options
|
|
177
178
|
super().__init__(**kwargs)
|
|
178
179
|
|
|
179
180
|
def _parent(self, path):
|
|
180
|
-
fs = _resolve_fs(path, self.method)
|
|
181
|
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
|
181
182
|
return fs.unstrip_protocol(fs._parent(path))
|
|
182
183
|
|
|
183
184
|
def _strip_protocol(self, path):
|
|
184
185
|
# normalization only
|
|
185
|
-
fs = _resolve_fs(path, self.method)
|
|
186
|
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
|
186
187
|
return fs.unstrip_protocol(fs._strip_protocol(path))
|
|
187
188
|
|
|
188
189
|
async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
|
|
189
|
-
fs = _resolve_fs(path, self.method)
|
|
190
|
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
|
190
191
|
if fs.async_impl:
|
|
191
192
|
out = await fs._find(
|
|
192
193
|
path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
|
|
@@ -251,7 +252,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
251
252
|
value,
|
|
252
253
|
**kwargs,
|
|
253
254
|
):
|
|
254
|
-
fs = _resolve_fs(path, self.method)
|
|
255
|
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
|
255
256
|
if fs.async_impl:
|
|
256
257
|
return await fs._pipe_file(path, value, **kwargs)
|
|
257
258
|
else:
|
|
@@ -269,7 +270,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
269
270
|
|
|
270
271
|
async def _makedirs(self, path, exist_ok=False):
|
|
271
272
|
logger.debug("Make dir %s", path)
|
|
272
|
-
fs = _resolve_fs(path, self.method)
|
|
273
|
+
fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
|
|
273
274
|
if fs.async_impl:
|
|
274
275
|
await fs._makedirs(path, exist_ok=exist_ok)
|
|
275
276
|
else:
|
|
@@ -288,6 +289,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
288
289
|
url2,
|
|
289
290
|
blocksize=2**20,
|
|
290
291
|
callback=DEFAULT_CALLBACK,
|
|
292
|
+
tempdir: Optional[str] = None,
|
|
291
293
|
**kwargs,
|
|
292
294
|
):
|
|
293
295
|
fs = _resolve_fs(url, self.method)
|
|
@@ -295,35 +297,10 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
295
297
|
if fs is fs2:
|
|
296
298
|
# pure remote
|
|
297
299
|
if fs.async_impl:
|
|
298
|
-
return await fs.
|
|
300
|
+
return await fs._copy(url, url2, **kwargs)
|
|
299
301
|
else:
|
|
300
|
-
return fs.
|
|
301
|
-
|
|
302
|
-
try:
|
|
303
|
-
f1 = (
|
|
304
|
-
await fs.open_async(url, "rb")
|
|
305
|
-
if hasattr(fs, "open_async")
|
|
306
|
-
else fs.open(url, "rb", **kw)
|
|
307
|
-
)
|
|
308
|
-
callback.set_size(await maybe_await(f1.size))
|
|
309
|
-
f2 = (
|
|
310
|
-
await fs2.open_async(url2, "wb")
|
|
311
|
-
if hasattr(fs2, "open_async")
|
|
312
|
-
else fs2.open(url2, "wb", **kw)
|
|
313
|
-
)
|
|
314
|
-
while f1.size is None or f2.tell() < f1.size:
|
|
315
|
-
data = await maybe_await(f1.read(blocksize))
|
|
316
|
-
if f1.size is None and not data:
|
|
317
|
-
break
|
|
318
|
-
await maybe_await(f2.write(data))
|
|
319
|
-
callback.absolute_update(f2.tell())
|
|
320
|
-
finally:
|
|
321
|
-
try:
|
|
322
|
-
await maybe_await(f2.close())
|
|
323
|
-
await maybe_await(f1.close())
|
|
324
|
-
except NameError:
|
|
325
|
-
# fail while opening f1 or f2
|
|
326
|
-
pass
|
|
302
|
+
return fs.copy(url, url2, **kwargs)
|
|
303
|
+
await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
|
|
327
304
|
|
|
328
305
|
async def _make_many_dirs(self, urls, exist_ok=True):
|
|
329
306
|
fs = _resolve_fs(urls[0], self.method)
|
|
@@ -347,17 +324,22 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
347
324
|
tempdir: Optional[str] = None,
|
|
348
325
|
**kwargs,
|
|
349
326
|
):
|
|
327
|
+
# TODO: special case for one FS being local, which can use get/put
|
|
328
|
+
# TODO: special case for one being memFS, which can use cat/pipe
|
|
350
329
|
if recursive:
|
|
351
|
-
raise NotImplementedError
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
330
|
+
raise NotImplementedError("Please use fsspec.generic.rsync")
|
|
331
|
+
path1 = [path1] if isinstance(path1, str) else path1
|
|
332
|
+
path2 = [path2] if isinstance(path2, str) else path2
|
|
333
|
+
|
|
334
|
+
fs = _resolve_fs(path1, self.method)
|
|
335
|
+
fs2 = _resolve_fs(path2, self.method)
|
|
336
|
+
|
|
355
337
|
if fs is fs2:
|
|
356
|
-
# pure remote
|
|
357
338
|
if fs.async_impl:
|
|
358
339
|
return await fs._copy(path1, path2, **kwargs)
|
|
359
340
|
else:
|
|
360
341
|
return fs.copy(path1, path2, **kwargs)
|
|
342
|
+
|
|
361
343
|
await copy_file_op(
|
|
362
344
|
fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
|
|
363
345
|
)
|
|
@@ -377,31 +359,33 @@ async def copy_file_op(
|
|
|
377
359
|
fs2,
|
|
378
360
|
u2,
|
|
379
361
|
os.path.join(tempdir, uuid.uuid4().hex),
|
|
380
|
-
on_error=on_error,
|
|
381
362
|
)
|
|
382
363
|
for u1, u2 in zip(url1, url2)
|
|
383
364
|
]
|
|
384
|
-
await _run_coros_in_chunks(
|
|
365
|
+
out = await _run_coros_in_chunks(
|
|
366
|
+
coros, batch_size=batch_size, return_exceptions=True
|
|
367
|
+
)
|
|
385
368
|
finally:
|
|
386
369
|
shutil.rmtree(tempdir)
|
|
370
|
+
if on_error == "return":
|
|
371
|
+
return out
|
|
372
|
+
elif on_error == "raise":
|
|
373
|
+
for o in out:
|
|
374
|
+
if isinstance(o, Exception):
|
|
375
|
+
raise o
|
|
387
376
|
|
|
388
377
|
|
|
389
378
|
async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
fs2.put_file(local, url2)
|
|
401
|
-
os.unlink(local)
|
|
402
|
-
logger.debug("Copy %s -> %s; done", url1, url2)
|
|
403
|
-
except ex as e:
|
|
404
|
-
logger.debug("ignoring cp exception for %s: %s", url1, e)
|
|
379
|
+
if fs1.async_impl:
|
|
380
|
+
await fs1._get_file(url1, local)
|
|
381
|
+
else:
|
|
382
|
+
fs1.get_file(url1, local)
|
|
383
|
+
if fs2.async_impl:
|
|
384
|
+
await fs2._put_file(local, url2)
|
|
385
|
+
else:
|
|
386
|
+
fs2.put_file(local, url2)
|
|
387
|
+
os.unlink(local)
|
|
388
|
+
logger.debug("Copy %s -> %s; done", url1, url2)
|
|
405
389
|
|
|
406
390
|
|
|
407
391
|
async def maybe_await(cor):
|
|
@@ -2,6 +2,7 @@ import asyncio
|
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
4
|
|
|
5
|
+
import fsspec
|
|
5
6
|
from fsspec.asyn import AsyncFileSystem, running_async
|
|
6
7
|
|
|
7
8
|
|
|
@@ -42,14 +43,24 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
|
|
|
42
43
|
The synchronous filesystem instance to wrap.
|
|
43
44
|
"""
|
|
44
45
|
|
|
45
|
-
protocol = "async_wrapper"
|
|
46
|
+
protocol = "asyncwrapper", "async_wrapper"
|
|
46
47
|
cachable = False
|
|
47
48
|
|
|
48
|
-
def __init__(
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
fs=None,
|
|
52
|
+
asynchronous=None,
|
|
53
|
+
target_protocol=None,
|
|
54
|
+
target_options=None,
|
|
55
|
+
**kwargs,
|
|
56
|
+
):
|
|
49
57
|
if asynchronous is None:
|
|
50
58
|
asynchronous = running_async()
|
|
51
|
-
super().__init__(
|
|
52
|
-
|
|
59
|
+
super().__init__(asynchronous=asynchronous, **kwargs)
|
|
60
|
+
if fs is not None:
|
|
61
|
+
self.sync_fs = fs
|
|
62
|
+
else:
|
|
63
|
+
self.sync_fs = fsspec.filesystem(target_protocol, **target_options)
|
|
53
64
|
self.protocol = self.sync_fs.protocol
|
|
54
65
|
self._wrap_all_sync_methods()
|
|
55
66
|
|
|
@@ -335,6 +335,11 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
335
335
|
self._metadata.update_file(path, detail)
|
|
336
336
|
logger.debug("Creating local sparse file for %s", path)
|
|
337
337
|
|
|
338
|
+
# explicitly submitting the size to the open call will avoid extra
|
|
339
|
+
# operations when opening. This is particularly relevant
|
|
340
|
+
# for any file that is read over a network, e.g. S3.
|
|
341
|
+
size = detail.get("size", None)
|
|
342
|
+
|
|
338
343
|
# call target filesystems open
|
|
339
344
|
self._mkcache()
|
|
340
345
|
f = self.fs._open(
|
|
@@ -344,8 +349,15 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
344
349
|
autocommit=autocommit,
|
|
345
350
|
cache_options=cache_options,
|
|
346
351
|
cache_type="none",
|
|
352
|
+
size=size,
|
|
347
353
|
**kwargs,
|
|
348
354
|
)
|
|
355
|
+
|
|
356
|
+
# set size if not already set
|
|
357
|
+
if size is None:
|
|
358
|
+
detail["size"] = f.size
|
|
359
|
+
self._metadata.update_file(path, detail)
|
|
360
|
+
|
|
349
361
|
if self.compression:
|
|
350
362
|
comp = (
|
|
351
363
|
infer_compression(path)
|
|
@@ -14,12 +14,13 @@ class DatabricksException(Exception):
|
|
|
14
14
|
Helper class for exceptions raised in this module.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
def __init__(self, error_code, message):
|
|
17
|
+
def __init__(self, error_code, message, details=None):
|
|
18
18
|
"""Create a new DatabricksException"""
|
|
19
19
|
super().__init__(message)
|
|
20
20
|
|
|
21
21
|
self.error_code = error_code
|
|
22
22
|
self.message = message
|
|
23
|
+
self.details = details
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class DatabricksFileSystem(AbstractFileSystem):
|
|
@@ -80,7 +81,7 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
80
81
|
raise FileNotFoundError(e.message) from e
|
|
81
82
|
|
|
82
83
|
raise
|
|
83
|
-
files = r
|
|
84
|
+
files = r.get("files", [])
|
|
84
85
|
out = [
|
|
85
86
|
{
|
|
86
87
|
"name": o["path"],
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import re
|
|
2
3
|
|
|
3
4
|
import requests
|
|
4
5
|
|
|
@@ -265,3 +266,70 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
265
266
|
cache_options=cache_options,
|
|
266
267
|
**kwargs,
|
|
267
268
|
)
|
|
269
|
+
|
|
270
|
+
def rm(self, path, recursive=False, maxdepth=None, message=None):
|
|
271
|
+
path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
|
|
272
|
+
for p in reversed(path):
|
|
273
|
+
self.rm_file(p, message=message)
|
|
274
|
+
|
|
275
|
+
def rm_file(self, path, message=None, **kwargs):
|
|
276
|
+
"""
|
|
277
|
+
Remove a file from a specified branch using a given commit message.
|
|
278
|
+
|
|
279
|
+
Since Github DELETE operation requires a branch name, and we can't reliably
|
|
280
|
+
determine whether the provided SHA refers to a branch, tag, or commit, we
|
|
281
|
+
assume it's a branch. If it's not, the user will encounter an error when
|
|
282
|
+
attempting to retrieve the file SHA or delete the file.
|
|
283
|
+
|
|
284
|
+
Parameters
|
|
285
|
+
----------
|
|
286
|
+
path: str
|
|
287
|
+
The file's location relative to the repository root.
|
|
288
|
+
message: str, optional
|
|
289
|
+
The commit message for the deletion.
|
|
290
|
+
"""
|
|
291
|
+
|
|
292
|
+
if not self.username:
|
|
293
|
+
raise ValueError("Authentication required")
|
|
294
|
+
|
|
295
|
+
path = self._strip_protocol(path)
|
|
296
|
+
|
|
297
|
+
# Attempt to get SHA from cache or Github API
|
|
298
|
+
sha = self._get_sha_from_cache(path)
|
|
299
|
+
if not sha:
|
|
300
|
+
url = self.content_url.format(
|
|
301
|
+
org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
|
|
302
|
+
)
|
|
303
|
+
r = requests.get(url, timeout=self.timeout, **self.kw)
|
|
304
|
+
if r.status_code == 404:
|
|
305
|
+
raise FileNotFoundError(path)
|
|
306
|
+
r.raise_for_status()
|
|
307
|
+
sha = r.json()["sha"]
|
|
308
|
+
|
|
309
|
+
# Delete the file
|
|
310
|
+
delete_url = self.content_url.format(
|
|
311
|
+
org=self.org, repo=self.repo, path=path, sha=self.root
|
|
312
|
+
)
|
|
313
|
+
branch = self.root
|
|
314
|
+
data = {
|
|
315
|
+
"message": message or f"Delete {path}",
|
|
316
|
+
"sha": sha,
|
|
317
|
+
**({"branch": branch} if branch else {}),
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
|
|
321
|
+
error_message = r.json().get("message", "")
|
|
322
|
+
if re.search(r"Branch .+ not found", error_message):
|
|
323
|
+
error = "Remove only works when the filesystem is initialised from a branch or default (None)"
|
|
324
|
+
raise ValueError(error)
|
|
325
|
+
r.raise_for_status()
|
|
326
|
+
|
|
327
|
+
self.invalidate_cache(path)
|
|
328
|
+
|
|
329
|
+
def _get_sha_from_cache(self, path):
|
|
330
|
+
for entries in self.dircache.values():
|
|
331
|
+
for entry in entries:
|
|
332
|
+
entry_path = entry.get("name")
|
|
333
|
+
if entry_path and entry_path == path and "sha" in entry:
|
|
334
|
+
return entry["sha"]
|
|
335
|
+
return None
|
|
@@ -6,6 +6,7 @@ import os.path as osp
|
|
|
6
6
|
import shutil
|
|
7
7
|
import stat
|
|
8
8
|
import tempfile
|
|
9
|
+
from functools import lru_cache
|
|
9
10
|
|
|
10
11
|
from fsspec import AbstractFileSystem
|
|
11
12
|
from fsspec.compression import compr
|
|
@@ -154,7 +155,11 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
154
155
|
def put_file(self, path1, path2, callback=None, **kwargs):
|
|
155
156
|
return self.cp_file(path1, path2, **kwargs)
|
|
156
157
|
|
|
157
|
-
def mv(self, path1, path2, **kwargs):
|
|
158
|
+
def mv(self, path1, path2, recursive: bool = True, **kwargs):
|
|
159
|
+
"""Move files/directories
|
|
160
|
+
For the specific case of local, all ops on directories are recursive and
|
|
161
|
+
the recursive= kwarg is ignored.
|
|
162
|
+
"""
|
|
158
163
|
path1 = self._strip_protocol(path1)
|
|
159
164
|
path2 = self._strip_protocol(path2)
|
|
160
165
|
shutil.move(path1, path2)
|
|
@@ -350,6 +355,19 @@ def trailing_sep(path):
|
|
|
350
355
|
return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
|
|
351
356
|
|
|
352
357
|
|
|
358
|
+
@lru_cache(maxsize=1)
|
|
359
|
+
def get_umask(mask: int = 0o666) -> int:
|
|
360
|
+
"""Get the current umask.
|
|
361
|
+
|
|
362
|
+
Follows https://stackoverflow.com/a/44130549 to get the umask.
|
|
363
|
+
Temporarily sets the umask to the given value, and then resets it to the
|
|
364
|
+
original value.
|
|
365
|
+
"""
|
|
366
|
+
value = os.umask(mask)
|
|
367
|
+
os.umask(value)
|
|
368
|
+
return value
|
|
369
|
+
|
|
370
|
+
|
|
353
371
|
class LocalFileOpener(io.IOBase):
|
|
354
372
|
def __init__(
|
|
355
373
|
self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
|
|
@@ -412,7 +430,22 @@ class LocalFileOpener(io.IOBase):
|
|
|
412
430
|
def commit(self):
|
|
413
431
|
if self.autocommit:
|
|
414
432
|
raise RuntimeError("Can only commit if not already set to autocommit")
|
|
415
|
-
|
|
433
|
+
try:
|
|
434
|
+
shutil.move(self.temp, self.path)
|
|
435
|
+
except PermissionError as e:
|
|
436
|
+
# shutil.move raises PermissionError if os.rename
|
|
437
|
+
# and the default copy2 fallback with shutil.copystats fail.
|
|
438
|
+
# The file should be there nonetheless, but without copied permissions.
|
|
439
|
+
# If it doesn't exist, there was no permission to create the file.
|
|
440
|
+
if not os.path.exists(self.path):
|
|
441
|
+
raise e
|
|
442
|
+
else:
|
|
443
|
+
# If PermissionError is not raised, permissions can be set.
|
|
444
|
+
try:
|
|
445
|
+
mask = 0o666
|
|
446
|
+
os.chmod(self.path, mask & ~get_umask(mask))
|
|
447
|
+
except RuntimeError:
|
|
448
|
+
pass
|
|
416
449
|
|
|
417
450
|
def discard(self):
|
|
418
451
|
if self.autocommit:
|
|
@@ -72,13 +72,13 @@ known_implementations = {
|
|
|
72
72
|
"class": "fsspec.implementations.arrow.HadoopFileSystem",
|
|
73
73
|
"err": "pyarrow and local java libraries required for HDFS",
|
|
74
74
|
},
|
|
75
|
-
"async_wrapper": {
|
|
76
|
-
"class": "fsspec.asyn_wrapper.AsyncWrapperFileSystem",
|
|
77
|
-
},
|
|
78
75
|
"asynclocal": {
|
|
79
76
|
"class": "morefs.asyn_local.AsyncLocalFileSystem",
|
|
80
77
|
"err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
|
|
81
78
|
},
|
|
79
|
+
"asyncwrapper": {
|
|
80
|
+
"class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
|
|
81
|
+
},
|
|
82
82
|
"az": {
|
|
83
83
|
"class": "adlfs.AzureBlobFileSystem",
|
|
84
84
|
"err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
|
|
@@ -180,6 +180,10 @@ known_implementations = {
|
|
|
180
180
|
"class": "ossfs.OSSFileSystem",
|
|
181
181
|
"err": "Install ossfs to access Alibaba Object Storage System",
|
|
182
182
|
},
|
|
183
|
+
"pyscript": {
|
|
184
|
+
"class": "pyscript_fsspec_client.client.PyscriptFileSystem",
|
|
185
|
+
"err": "Install requests (cpython) or run in pyscript",
|
|
186
|
+
},
|
|
183
187
|
"reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
|
|
184
188
|
"root": {
|
|
185
189
|
"class": "fsspec_xrootd.XRootDFileSystem",
|
|
@@ -548,17 +548,45 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
548
548
|
return sizes
|
|
549
549
|
|
|
550
550
|
def glob(self, path, maxdepth=None, **kwargs):
|
|
551
|
-
"""
|
|
552
|
-
Find files by glob-matching.
|
|
551
|
+
"""Find files by glob-matching.
|
|
553
552
|
|
|
554
|
-
|
|
553
|
+
Pattern matching capabilities for finding files that match the given pattern.
|
|
555
554
|
|
|
556
|
-
|
|
557
|
-
|
|
555
|
+
Parameters
|
|
556
|
+
----------
|
|
557
|
+
path: str
|
|
558
|
+
The glob pattern to match against
|
|
559
|
+
maxdepth: int or None
|
|
560
|
+
Maximum depth for '**' patterns. Applied on the first '**' found.
|
|
561
|
+
Must be at least 1 if provided.
|
|
562
|
+
**kwargs:
|
|
563
|
+
Additional arguments passed to ``find`` (e.g., detail=True)
|
|
558
564
|
|
|
559
|
-
|
|
565
|
+
Returns
|
|
566
|
+
-------
|
|
567
|
+
List of matched paths, or dict of paths and their info if detail=True
|
|
560
568
|
|
|
561
|
-
|
|
569
|
+
Notes
|
|
570
|
+
-----
|
|
571
|
+
Supported patterns:
|
|
572
|
+
- '*': Matches any sequence of characters within a single directory level
|
|
573
|
+
- '**': Matches any number of directory levels (must be an entire path component)
|
|
574
|
+
- '?': Matches exactly one character
|
|
575
|
+
- '[abc]': Matches any character in the set
|
|
576
|
+
- '[a-z]': Matches any character in the range
|
|
577
|
+
- '[!abc]': Matches any character NOT in the set
|
|
578
|
+
|
|
579
|
+
Special behaviors:
|
|
580
|
+
- If the path ends with '/', only folders are returned
|
|
581
|
+
- Consecutive '*' characters are compressed into a single '*'
|
|
582
|
+
- Empty brackets '[]' never match anything
|
|
583
|
+
- Negated empty brackets '[!]' match any single character
|
|
584
|
+
- Special characters in character classes are escaped properly
|
|
585
|
+
|
|
586
|
+
Limitations:
|
|
587
|
+
- '**' must be a complete path component (e.g., 'a/**/b', not 'a**b')
|
|
588
|
+
- No brace expansion ('{a,b}.txt')
|
|
589
|
+
- No extended glob patterns ('+(pattern)', '!(pattern)')
|
|
562
590
|
"""
|
|
563
591
|
if maxdepth is not None and maxdepth < 1:
|
|
564
592
|
raise ValueError("maxdepth must be at least 1")
|
|
@@ -8,7 +8,7 @@ dynamic = ["version"]
|
|
|
8
8
|
description = "File-system specification"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
11
|
-
requires-python = ">=3.
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
12
|
maintainers = [{ name = "Martin Durant", email = "mdurant@anaconda.com" }]
|
|
13
13
|
keywords = ["file"]
|
|
14
14
|
classifiers = [
|
|
@@ -16,7 +16,6 @@ classifiers = [
|
|
|
16
16
|
"Intended Audience :: Developers",
|
|
17
17
|
"License :: OSI Approved :: BSD License",
|
|
18
18
|
"Operating System :: OS Independent",
|
|
19
|
-
"Programming Language :: Python :: 3.8",
|
|
20
19
|
"Programming Language :: Python :: 3.9",
|
|
21
20
|
"Programming Language :: Python :: 3.10",
|
|
22
21
|
"Programming Language :: Python :: 3.11",
|
|
@@ -184,7 +183,7 @@ select = [
|
|
|
184
183
|
"SIM",
|
|
185
184
|
"SLOT",
|
|
186
185
|
"SIM101",
|
|
187
|
-
"
|
|
186
|
+
"TC",
|
|
188
187
|
"UP",
|
|
189
188
|
]
|
|
190
189
|
ignore = [
|
fsspec-2025.3.1/Untitled.ipynb
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
hello
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|