fsspec 2025.3.2__tar.gz → 2025.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {fsspec-2025.3.2 → fsspec-2025.5.0}/PKG-INFO +1 -1
  2. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/api.rst +3 -1
  3. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/changelog.rst +25 -0
  4. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/__init__.py +4 -2
  5. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/_version.py +2 -2
  6. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/generic.py +45 -61
  7. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/asyn_wrapper.py +15 -4
  8. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/cached.py +12 -0
  9. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/dbfs.py +3 -2
  10. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/github.py +68 -0
  11. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/local.py +35 -2
  12. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/registry.py +7 -3
  13. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/spec.py +35 -7
  14. {fsspec-2025.3.2 → fsspec-2025.5.0}/pyproject.toml +1 -1
  15. {fsspec-2025.3.2 → fsspec-2025.5.0}/.codespellrc +0 -0
  16. {fsspec-2025.3.2 → fsspec-2025.5.0}/.coveragerc +0 -0
  17. {fsspec-2025.3.2 → fsspec-2025.5.0}/.gitattributes +0 -0
  18. {fsspec-2025.3.2 → fsspec-2025.5.0}/.github/workflows/main.yaml +0 -0
  19. {fsspec-2025.3.2 → fsspec-2025.5.0}/.github/workflows/pypipublish.yaml +0 -0
  20. {fsspec-2025.3.2 → fsspec-2025.5.0}/.gitignore +0 -0
  21. {fsspec-2025.3.2 → fsspec-2025.5.0}/.pre-commit-config.yaml +0 -0
  22. {fsspec-2025.3.2 → fsspec-2025.5.0}/LICENSE +0 -0
  23. {fsspec-2025.3.2 → fsspec-2025.5.0}/README.md +0 -0
  24. {fsspec-2025.3.2 → fsspec-2025.5.0}/ci/environment-downstream.yml +0 -0
  25. {fsspec-2025.3.2 → fsspec-2025.5.0}/ci/environment-friends.yml +0 -0
  26. {fsspec-2025.3.2 → fsspec-2025.5.0}/ci/environment-py38.yml +0 -0
  27. {fsspec-2025.3.2 → fsspec-2025.5.0}/ci/environment-typecheck.yml +0 -0
  28. {fsspec-2025.3.2 → fsspec-2025.5.0}/ci/environment-win.yml +0 -0
  29. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/Makefile +0 -0
  30. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/README.md +0 -0
  31. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/environment.yml +0 -0
  32. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/make.bat +0 -0
  33. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/_static/custom.css +0 -0
  34. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/async.rst +0 -0
  35. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/conf.py +0 -0
  36. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/copying.rst +0 -0
  37. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/developer.rst +0 -0
  38. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/features.rst +0 -0
  39. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/img/gui.png +0 -0
  40. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/index.rst +0 -0
  41. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/intro.rst +0 -0
  42. {fsspec-2025.3.2 → fsspec-2025.5.0}/docs/source/usage.rst +0 -0
  43. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/archive.py +0 -0
  44. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/asyn.py +0 -0
  45. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/caching.py +0 -0
  46. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/callbacks.py +0 -0
  47. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/compression.py +0 -0
  48. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/config.py +0 -0
  49. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/conftest.py +0 -0
  50. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/core.py +0 -0
  51. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/dircache.py +0 -0
  52. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/exceptions.py +0 -0
  53. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/fuse.py +0 -0
  54. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/gui.py +0 -0
  55. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/__init__.py +0 -0
  56. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/arrow.py +0 -0
  57. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/cache_mapper.py +0 -0
  58. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/cache_metadata.py +0 -0
  59. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/dask.py +0 -0
  60. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/data.py +0 -0
  61. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/dirfs.py +0 -0
  62. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/ftp.py +0 -0
  63. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/git.py +0 -0
  64. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/http.py +0 -0
  65. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/http_sync.py +0 -0
  66. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/jupyter.py +0 -0
  67. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/libarchive.py +0 -0
  68. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/memory.py +0 -0
  69. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/reference.py +0 -0
  70. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/sftp.py +0 -0
  71. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/smb.py +0 -0
  72. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/tar.py +0 -0
  73. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/webhdfs.py +0 -0
  74. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/implementations/zip.py +0 -0
  75. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/json.py +0 -0
  76. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/mapping.py +0 -0
  77. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/parquet.py +0 -0
  78. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/__init__.py +0 -0
  79. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/common.py +0 -0
  80. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/copy.py +0 -0
  81. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/get.py +0 -0
  82. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/mv.py +0 -0
  83. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/open.py +0 -0
  84. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/pipe.py +0 -0
  85. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/tests/abstract/put.py +0 -0
  86. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/transaction.py +0 -0
  87. {fsspec-2025.3.2 → fsspec-2025.5.0}/fsspec/utils.py +0 -0
  88. {fsspec-2025.3.2 → fsspec-2025.5.0}/install_s3fs.sh +0 -0
  89. {fsspec-2025.3.2 → fsspec-2025.5.0}/readthedocs.yml +0 -0
  90. {fsspec-2025.3.2 → fsspec-2025.5.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2025.3.2
3
+ Version: 2025.5.0
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -221,7 +221,7 @@ documentation carefully before using any particular package.
221
221
  - `dropbox`_ for access to dropbox shares, with protocol "dropbox://"
222
222
  - `dvc`_ to access DVC/Git repository as a filesystem
223
223
  - `fsspec-encrypted`_ for transparent encryption on top of other fsspec filesystems.
224
- - `gcsfs`_ for Google Cloud Storage, with protocol "gcs://"
224
+ - `gcsfs`_ for Google Cloud Storage, with protocol "gs://" or "gcs://"
225
225
  - `gdrive`_ to access Google Drive and shares (experimental)
226
226
  - `git`_ to access Git repositories
227
227
  - `huggingface_hub`_ to access the Hugging Face Hub filesystem, with protocol "hf://"
@@ -237,6 +237,7 @@ documentation carefully before using any particular package.
237
237
  - `p9fs`_ for 9P (Plan 9 Filesystem Protocol) servers
238
238
  - `PyAthena`_ for S3 access to Amazon Athena, with protocol "s3://" or "s3a://"
239
239
  - `PyDrive2`_ for Google Drive access
240
+ - `fsspec-proxy`_ for "pyscript:" URLs via a proxy server
240
241
  - `s3fs`_ for Amazon S3 and other compatible stores, with protocol "s3://"
241
242
  - `sshfs`_ for access to SSH servers, with protocol "ssh://" or "sftp://"
242
243
  - `swiftspec`_ for OpenStack SWIFT, with protocol "swift://"
@@ -254,6 +255,7 @@ documentation carefully before using any particular package.
254
255
  .. _dropbox: https://github.com/fsspec/dropboxdrivefs
255
256
  .. _dvc: https://github.com/iterative/dvc
256
257
  .. _fsspec-encrypted: https://github.com/thevgergroup/fsspec-encrypted
258
+ .. _fsspec-proxy: https://github.com/fsspec/fsspec-proxy
257
259
  .. _gcsfs: https://gcsfs.readthedocs.io/en/latest/
258
260
  .. _gdrive: https://github.com/fsspec/gdrivefs
259
261
  .. _git: https://github.com/iterative/scmrepo
@@ -1,6 +1,31 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ 2025.5.0
5
+ --------
6
+
7
+ Enhancements
8
+
9
+ - add rm in github: (##1839)
10
+ - cachingFS performance, don't ls every time (#1833)
11
+ - pyscript: protocol and optional entrypoint (#1828)
12
+
13
+ Fixes
14
+
15
+ - improve cp in generic (#1835)
16
+ - fix ls in dbfs (#1834)
17
+ - fix parquet tests for pyarrow 20 (#1831)
18
+ - fix crosslink for local tempfile (#1829)
19
+ - keep permissions in local transaction (#1826)
20
+ - signature compatibility in local cp() (#1820)
21
+
22
+ Other
23
+
24
+ - add "gs" at mention of gcsfs in docs (#1840)
25
+ - lint style (#1837)
26
+ - glob docstring to reflect reality (#1825)
27
+
28
+
4
29
  2025.3.2
5
30
  --------
6
31
 
@@ -1,5 +1,3 @@
1
- from importlib.metadata import entry_points
2
-
3
1
  from . import caching
4
2
  from ._version import __version__ # noqa: F401
5
3
  from .callbacks import Callback
@@ -38,6 +36,10 @@ __all__ = [
38
36
 
39
37
 
40
38
  def process_entries():
39
+ try:
40
+ from importlib.metadata import entry_points
41
+ except ImportError:
42
+ return
41
43
  if entry_points is not None:
42
44
  try:
43
45
  eps = entry_points()
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '2025.3.2'
21
- __version_tuple__ = version_tuple = (2025, 3, 2)
20
+ __version__ = version = '2025.5.0'
21
+ __version_tuple__ = version_tuple = (2025, 5, 0)
@@ -16,15 +16,13 @@ logger = logging.getLogger("fsspec.generic")
16
16
 
17
17
 
18
18
  def set_generic_fs(protocol, **storage_options):
19
+ """Populate the dict used for method=="generic" lookups"""
19
20
  _generic_fs[protocol] = filesystem(protocol, **storage_options)
20
21
 
21
22
 
22
- default_method = "default"
23
-
24
-
25
- def _resolve_fs(url, method=None, protocol=None, storage_options=None):
23
+ def _resolve_fs(url, method, protocol=None, storage_options=None):
26
24
  """Pick instance of backend FS"""
27
- method = method or default_method
25
+ url = url[0] if isinstance(url, (list, tuple)) else url
28
26
  protocol = protocol or split_protocol(url)[0]
29
27
  storage_options = storage_options or {}
30
28
  if method == "default":
@@ -159,7 +157,7 @@ class GenericFileSystem(AsyncFileSystem):
159
157
 
160
158
  protocol = "generic" # there is no real reason to ever use a protocol with this FS
161
159
 
162
- def __init__(self, default_method="default", **kwargs):
160
+ def __init__(self, default_method="default", storage_options=None, **kwargs):
163
161
  """
164
162
 
165
163
  Parameters
@@ -171,22 +169,25 @@ class GenericFileSystem(AsyncFileSystem):
171
169
  configured via the config system
172
170
  - "generic": takes instances from the `_generic_fs` dict in this module,
173
171
  which you must populate before use. Keys are by protocol
172
+ - "options": expects storage_options, a dict mapping protocol to
173
+ kwargs to use when constructing the filesystem
174
174
  - "current": takes the most recently instantiated version of each FS
175
175
  """
176
176
  self.method = default_method
177
+ self.st_opts = storage_options
177
178
  super().__init__(**kwargs)
178
179
 
179
180
  def _parent(self, path):
180
- fs = _resolve_fs(path, self.method)
181
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
181
182
  return fs.unstrip_protocol(fs._parent(path))
182
183
 
183
184
  def _strip_protocol(self, path):
184
185
  # normalization only
185
- fs = _resolve_fs(path, self.method)
186
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
186
187
  return fs.unstrip_protocol(fs._strip_protocol(path))
187
188
 
188
189
  async def _find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
189
- fs = _resolve_fs(path, self.method)
190
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
190
191
  if fs.async_impl:
191
192
  out = await fs._find(
192
193
  path, maxdepth=maxdepth, withdirs=withdirs, detail=True, **kwargs
@@ -251,7 +252,7 @@ class GenericFileSystem(AsyncFileSystem):
251
252
  value,
252
253
  **kwargs,
253
254
  ):
254
- fs = _resolve_fs(path, self.method)
255
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
255
256
  if fs.async_impl:
256
257
  return await fs._pipe_file(path, value, **kwargs)
257
258
  else:
@@ -269,7 +270,7 @@ class GenericFileSystem(AsyncFileSystem):
269
270
 
270
271
  async def _makedirs(self, path, exist_ok=False):
271
272
  logger.debug("Make dir %s", path)
272
- fs = _resolve_fs(path, self.method)
273
+ fs = _resolve_fs(path, self.method, storage_options=self.st_opts)
273
274
  if fs.async_impl:
274
275
  await fs._makedirs(path, exist_ok=exist_ok)
275
276
  else:
@@ -288,6 +289,7 @@ class GenericFileSystem(AsyncFileSystem):
288
289
  url2,
289
290
  blocksize=2**20,
290
291
  callback=DEFAULT_CALLBACK,
292
+ tempdir: Optional[str] = None,
291
293
  **kwargs,
292
294
  ):
293
295
  fs = _resolve_fs(url, self.method)
@@ -295,35 +297,10 @@ class GenericFileSystem(AsyncFileSystem):
295
297
  if fs is fs2:
296
298
  # pure remote
297
299
  if fs.async_impl:
298
- return await fs._cp_file(url, url2, **kwargs)
300
+ return await fs._copy(url, url2, **kwargs)
299
301
  else:
300
- return fs.cp_file(url, url2, **kwargs)
301
- kw = {"blocksize": 0, "cache_type": "none"}
302
- try:
303
- f1 = (
304
- await fs.open_async(url, "rb")
305
- if hasattr(fs, "open_async")
306
- else fs.open(url, "rb", **kw)
307
- )
308
- callback.set_size(await maybe_await(f1.size))
309
- f2 = (
310
- await fs2.open_async(url2, "wb")
311
- if hasattr(fs2, "open_async")
312
- else fs2.open(url2, "wb", **kw)
313
- )
314
- while f1.size is None or f2.tell() < f1.size:
315
- data = await maybe_await(f1.read(blocksize))
316
- if f1.size is None and not data:
317
- break
318
- await maybe_await(f2.write(data))
319
- callback.absolute_update(f2.tell())
320
- finally:
321
- try:
322
- await maybe_await(f2.close())
323
- await maybe_await(f1.close())
324
- except NameError:
325
- # fail while opening f1 or f2
326
- pass
302
+ return fs.copy(url, url2, **kwargs)
303
+ await copy_file_op(fs, [url], fs2, [url2], tempdir, 1, on_error="raise")
327
304
 
328
305
  async def _make_many_dirs(self, urls, exist_ok=True):
329
306
  fs = _resolve_fs(urls[0], self.method)
@@ -347,17 +324,22 @@ class GenericFileSystem(AsyncFileSystem):
347
324
  tempdir: Optional[str] = None,
348
325
  **kwargs,
349
326
  ):
327
+ # TODO: special case for one FS being local, which can use get/put
328
+ # TODO: special case for one being memFS, which can use cat/pipe
350
329
  if recursive:
351
- raise NotImplementedError
352
- fs = _resolve_fs(path1[0], self.method)
353
- fs2 = _resolve_fs(path2[0], self.method)
354
- # not expanding paths atm., assume call is from rsync()
330
+ raise NotImplementedError("Please use fsspec.generic.rsync")
331
+ path1 = [path1] if isinstance(path1, str) else path1
332
+ path2 = [path2] if isinstance(path2, str) else path2
333
+
334
+ fs = _resolve_fs(path1, self.method)
335
+ fs2 = _resolve_fs(path2, self.method)
336
+
355
337
  if fs is fs2:
356
- # pure remote
357
338
  if fs.async_impl:
358
339
  return await fs._copy(path1, path2, **kwargs)
359
340
  else:
360
341
  return fs.copy(path1, path2, **kwargs)
342
+
361
343
  await copy_file_op(
362
344
  fs, path1, fs2, path2, tempdir, batch_size, on_error=on_error
363
345
  )
@@ -377,31 +359,33 @@ async def copy_file_op(
377
359
  fs2,
378
360
  u2,
379
361
  os.path.join(tempdir, uuid.uuid4().hex),
380
- on_error=on_error,
381
362
  )
382
363
  for u1, u2 in zip(url1, url2)
383
364
  ]
384
- await _run_coros_in_chunks(coros, batch_size=batch_size)
365
+ out = await _run_coros_in_chunks(
366
+ coros, batch_size=batch_size, return_exceptions=True
367
+ )
385
368
  finally:
386
369
  shutil.rmtree(tempdir)
370
+ if on_error == "return":
371
+ return out
372
+ elif on_error == "raise":
373
+ for o in out:
374
+ if isinstance(o, Exception):
375
+ raise o
387
376
 
388
377
 
389
378
  async def _copy_file_op(fs1, url1, fs2, url2, local, on_error="ignore"):
390
- ex = () if on_error == "raise" else Exception
391
- logger.debug("Copy %s -> %s", url1, url2)
392
- try:
393
- if fs1.async_impl:
394
- await fs1._get_file(url1, local)
395
- else:
396
- fs1.get_file(url1, local)
397
- if fs2.async_impl:
398
- await fs2._put_file(local, url2)
399
- else:
400
- fs2.put_file(local, url2)
401
- os.unlink(local)
402
- logger.debug("Copy %s -> %s; done", url1, url2)
403
- except ex as e:
404
- logger.debug("ignoring cp exception for %s: %s", url1, e)
379
+ if fs1.async_impl:
380
+ await fs1._get_file(url1, local)
381
+ else:
382
+ fs1.get_file(url1, local)
383
+ if fs2.async_impl:
384
+ await fs2._put_file(local, url2)
385
+ else:
386
+ fs2.put_file(local, url2)
387
+ os.unlink(local)
388
+ logger.debug("Copy %s -> %s; done", url1, url2)
405
389
 
406
390
 
407
391
  async def maybe_await(cor):
@@ -2,6 +2,7 @@ import asyncio
2
2
  import functools
3
3
  import inspect
4
4
 
5
+ import fsspec
5
6
  from fsspec.asyn import AsyncFileSystem, running_async
6
7
 
7
8
 
@@ -42,14 +43,24 @@ class AsyncFileSystemWrapper(AsyncFileSystem):
42
43
  The synchronous filesystem instance to wrap.
43
44
  """
44
45
 
45
- protocol = "async_wrapper"
46
+ protocol = "asyncwrapper", "async_wrapper"
46
47
  cachable = False
47
48
 
48
- def __init__(self, fs, *args, asynchronous=None, **kwargs):
49
+ def __init__(
50
+ self,
51
+ fs=None,
52
+ asynchronous=None,
53
+ target_protocol=None,
54
+ target_options=None,
55
+ **kwargs,
56
+ ):
49
57
  if asynchronous is None:
50
58
  asynchronous = running_async()
51
- super().__init__(*args, asynchronous=asynchronous, **kwargs)
52
- self.sync_fs = fs
59
+ super().__init__(asynchronous=asynchronous, **kwargs)
60
+ if fs is not None:
61
+ self.sync_fs = fs
62
+ else:
63
+ self.sync_fs = fsspec.filesystem(target_protocol, **target_options)
53
64
  self.protocol = self.sync_fs.protocol
54
65
  self._wrap_all_sync_methods()
55
66
 
@@ -335,6 +335,11 @@ class CachingFileSystem(AbstractFileSystem):
335
335
  self._metadata.update_file(path, detail)
336
336
  logger.debug("Creating local sparse file for %s", path)
337
337
 
338
+ # explicitly submitting the size to the open call will avoid extra
339
+ # operations when opening. This is particularly relevant
340
+ # for any file that is read over a network, e.g. S3.
341
+ size = detail.get("size", None)
342
+
338
343
  # call target filesystems open
339
344
  self._mkcache()
340
345
  f = self.fs._open(
@@ -344,8 +349,15 @@ class CachingFileSystem(AbstractFileSystem):
344
349
  autocommit=autocommit,
345
350
  cache_options=cache_options,
346
351
  cache_type="none",
352
+ size=size,
347
353
  **kwargs,
348
354
  )
355
+
356
+ # set size if not already set
357
+ if size is None:
358
+ detail["size"] = f.size
359
+ self._metadata.update_file(path, detail)
360
+
349
361
  if self.compression:
350
362
  comp = (
351
363
  infer_compression(path)
@@ -14,12 +14,13 @@ class DatabricksException(Exception):
14
14
  Helper class for exceptions raised in this module.
15
15
  """
16
16
 
17
- def __init__(self, error_code, message):
17
+ def __init__(self, error_code, message, details=None):
18
18
  """Create a new DatabricksException"""
19
19
  super().__init__(message)
20
20
 
21
21
  self.error_code = error_code
22
22
  self.message = message
23
+ self.details = details
23
24
 
24
25
 
25
26
  class DatabricksFileSystem(AbstractFileSystem):
@@ -80,7 +81,7 @@ class DatabricksFileSystem(AbstractFileSystem):
80
81
  raise FileNotFoundError(e.message) from e
81
82
 
82
83
  raise
83
- files = r["files"]
84
+ files = r.get("files", [])
84
85
  out = [
85
86
  {
86
87
  "name": o["path"],
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import re
2
3
 
3
4
  import requests
4
5
 
@@ -265,3 +266,70 @@ class GithubFileSystem(AbstractFileSystem):
265
266
  cache_options=cache_options,
266
267
  **kwargs,
267
268
  )
269
+
270
+ def rm(self, path, recursive=False, maxdepth=None, message=None):
271
+ path = self.expand_path(path, recursive=recursive, maxdepth=maxdepth)
272
+ for p in reversed(path):
273
+ self.rm_file(p, message=message)
274
+
275
+ def rm_file(self, path, message=None, **kwargs):
276
+ """
277
+ Remove a file from a specified branch using a given commit message.
278
+
279
+ Since Github DELETE operation requires a branch name, and we can't reliably
280
+ determine whether the provided SHA refers to a branch, tag, or commit, we
281
+ assume it's a branch. If it's not, the user will encounter an error when
282
+ attempting to retrieve the file SHA or delete the file.
283
+
284
+ Parameters
285
+ ----------
286
+ path: str
287
+ The file's location relative to the repository root.
288
+ message: str, optional
289
+ The commit message for the deletion.
290
+ """
291
+
292
+ if not self.username:
293
+ raise ValueError("Authentication required")
294
+
295
+ path = self._strip_protocol(path)
296
+
297
+ # Attempt to get SHA from cache or Github API
298
+ sha = self._get_sha_from_cache(path)
299
+ if not sha:
300
+ url = self.content_url.format(
301
+ org=self.org, repo=self.repo, path=path.lstrip("/"), sha=self.root
302
+ )
303
+ r = requests.get(url, timeout=self.timeout, **self.kw)
304
+ if r.status_code == 404:
305
+ raise FileNotFoundError(path)
306
+ r.raise_for_status()
307
+ sha = r.json()["sha"]
308
+
309
+ # Delete the file
310
+ delete_url = self.content_url.format(
311
+ org=self.org, repo=self.repo, path=path, sha=self.root
312
+ )
313
+ branch = self.root
314
+ data = {
315
+ "message": message or f"Delete {path}",
316
+ "sha": sha,
317
+ **({"branch": branch} if branch else {}),
318
+ }
319
+
320
+ r = requests.delete(delete_url, json=data, timeout=self.timeout, **self.kw)
321
+ error_message = r.json().get("message", "")
322
+ if re.search(r"Branch .+ not found", error_message):
323
+ error = "Remove only works when the filesystem is initialised from a branch or default (None)"
324
+ raise ValueError(error)
325
+ r.raise_for_status()
326
+
327
+ self.invalidate_cache(path)
328
+
329
+ def _get_sha_from_cache(self, path):
330
+ for entries in self.dircache.values():
331
+ for entry in entries:
332
+ entry_path = entry.get("name")
333
+ if entry_path and entry_path == path and "sha" in entry:
334
+ return entry["sha"]
335
+ return None
@@ -6,6 +6,7 @@ import os.path as osp
6
6
  import shutil
7
7
  import stat
8
8
  import tempfile
9
+ from functools import lru_cache
9
10
 
10
11
  from fsspec import AbstractFileSystem
11
12
  from fsspec.compression import compr
@@ -154,7 +155,11 @@ class LocalFileSystem(AbstractFileSystem):
154
155
  def put_file(self, path1, path2, callback=None, **kwargs):
155
156
  return self.cp_file(path1, path2, **kwargs)
156
157
 
157
- def mv(self, path1, path2, **kwargs):
158
+ def mv(self, path1, path2, recursive: bool = True, **kwargs):
159
+ """Move files/directories
160
+ For the specific case of local, all ops on directories are recursive and
161
+ the recursive= kwarg is ignored.
162
+ """
158
163
  path1 = self._strip_protocol(path1)
159
164
  path2 = self._strip_protocol(path2)
160
165
  shutil.move(path1, path2)
@@ -350,6 +355,19 @@ def trailing_sep(path):
350
355
  return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep))
351
356
 
352
357
 
358
+ @lru_cache(maxsize=1)
359
+ def get_umask(mask: int = 0o666) -> int:
360
+ """Get the current umask.
361
+
362
+ Follows https://stackoverflow.com/a/44130549 to get the umask.
363
+ Temporarily sets the umask to the given value, and then resets it to the
364
+ original value.
365
+ """
366
+ value = os.umask(mask)
367
+ os.umask(value)
368
+ return value
369
+
370
+
353
371
  class LocalFileOpener(io.IOBase):
354
372
  def __init__(
355
373
  self, path, mode, autocommit=True, fs=None, compression=None, **kwargs
@@ -412,7 +430,22 @@ class LocalFileOpener(io.IOBase):
412
430
  def commit(self):
413
431
  if self.autocommit:
414
432
  raise RuntimeError("Can only commit if not already set to autocommit")
415
- shutil.move(self.temp, self.path)
433
+ try:
434
+ shutil.move(self.temp, self.path)
435
+ except PermissionError as e:
436
+ # shutil.move raises PermissionError if os.rename
437
+ # and the default copy2 fallback with shutil.copystats fail.
438
+ # The file should be there nonetheless, but without copied permissions.
439
+ # If it doesn't exist, there was no permission to create the file.
440
+ if not os.path.exists(self.path):
441
+ raise e
442
+ else:
443
+ # If PermissionError is not raised, permissions can be set.
444
+ try:
445
+ mask = 0o666
446
+ os.chmod(self.path, mask & ~get_umask(mask))
447
+ except RuntimeError:
448
+ pass
416
449
 
417
450
  def discard(self):
418
451
  if self.autocommit:
@@ -72,13 +72,13 @@ known_implementations = {
72
72
  "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
73
  "err": "pyarrow and local java libraries required for HDFS",
74
74
  },
75
- "async_wrapper": {
76
- "class": "fsspec.asyn_wrapper.AsyncWrapperFileSystem",
77
- },
78
75
  "asynclocal": {
79
76
  "class": "morefs.asyn_local.AsyncLocalFileSystem",
80
77
  "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
81
78
  },
79
+ "asyncwrapper": {
80
+ "class": "fsspec.implementations.asyn_wrapper.AsyncFileSystemWrapper",
81
+ },
82
82
  "az": {
83
83
  "class": "adlfs.AzureBlobFileSystem",
84
84
  "err": "Install adlfs to access Azure Datalake Gen2 and Azure Blob Storage",
@@ -180,6 +180,10 @@ known_implementations = {
180
180
  "class": "ossfs.OSSFileSystem",
181
181
  "err": "Install ossfs to access Alibaba Object Storage System",
182
182
  },
183
+ "pyscript": {
184
+ "class": "pyscript_fsspec_client.client.PyscriptFileSystem",
185
+ "err": "Install requests (cpython) or run in pyscript",
186
+ },
183
187
  "reference": {"class": "fsspec.implementations.reference.ReferenceFileSystem"},
184
188
  "root": {
185
189
  "class": "fsspec_xrootd.XRootDFileSystem",
@@ -548,17 +548,45 @@ class AbstractFileSystem(metaclass=_Cached):
548
548
  return sizes
549
549
 
550
550
  def glob(self, path, maxdepth=None, **kwargs):
551
- """
552
- Find files by glob-matching.
551
+ """Find files by glob-matching.
553
552
 
554
- If the path ends with '/', only folders are returned.
553
+ Pattern matching capabilities for finding files that match the given pattern.
555
554
 
556
- We support ``"**"``,
557
- ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation.
555
+ Parameters
556
+ ----------
557
+ path: str
558
+ The glob pattern to match against
559
+ maxdepth: int or None
560
+ Maximum depth for '**' patterns. Applied on the first '**' found.
561
+ Must be at least 1 if provided.
562
+ **kwargs:
563
+ Additional arguments passed to ``find`` (e.g., detail=True)
558
564
 
559
- The `maxdepth` option is applied on the first `**` found in the path.
565
+ Returns
566
+ -------
567
+ List of matched paths, or dict of paths and their info if detail=True
560
568
 
561
- kwargs are passed to ``ls``.
569
+ Notes
570
+ -----
571
+ Supported patterns:
572
+ - '*': Matches any sequence of characters within a single directory level
573
+ - '**': Matches any number of directory levels (must be an entire path component)
574
+ - '?': Matches exactly one character
575
+ - '[abc]': Matches any character in the set
576
+ - '[a-z]': Matches any character in the range
577
+ - '[!abc]': Matches any character NOT in the set
578
+
579
+ Special behaviors:
580
+ - If the path ends with '/', only folders are returned
581
+ - Consecutive '*' characters are compressed into a single '*'
582
+ - Empty brackets '[]' never match anything
583
+ - Negated empty brackets '[!]' match any single character
584
+ - Special characters in character classes are escaped properly
585
+
586
+ Limitations:
587
+ - '**' must be a complete path component (e.g., 'a/**/b', not 'a**b')
588
+ - No brace expansion ('{a,b}.txt')
589
+ - No extended glob patterns ('+(pattern)', '!(pattern)')
562
590
  """
563
591
  if maxdepth is not None and maxdepth < 1:
564
592
  raise ValueError("maxdepth must be at least 1")
@@ -183,7 +183,7 @@ select = [
183
183
  "SIM",
184
184
  "SLOT",
185
185
  "SIM101",
186
- "TCH",
186
+ "TC",
187
187
  "UP",
188
188
  ]
189
189
  ignore = [
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes