fsspec 2023.10.0__py3-none-any.whl → 2024.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. fsspec/_version.py +3 -3
  2. fsspec/archive.py +4 -4
  3. fsspec/asyn.py +43 -53
  4. fsspec/caching.py +1 -1
  5. fsspec/callbacks.py +98 -12
  6. fsspec/compression.py +3 -3
  7. fsspec/core.py +16 -3
  8. fsspec/exceptions.py +0 -4
  9. fsspec/generic.py +11 -4
  10. fsspec/gui.py +4 -3
  11. fsspec/implementations/arrow.py +9 -0
  12. fsspec/implementations/cache_mapper.py +2 -6
  13. fsspec/implementations/cached.py +92 -18
  14. fsspec/implementations/data.py +48 -0
  15. fsspec/implementations/dbfs.py +14 -4
  16. fsspec/implementations/dirfs.py +6 -0
  17. fsspec/implementations/ftp.py +18 -13
  18. fsspec/implementations/github.py +17 -5
  19. fsspec/implementations/http.py +42 -51
  20. fsspec/implementations/libarchive.py +2 -3
  21. fsspec/implementations/local.py +11 -4
  22. fsspec/implementations/memory.py +2 -2
  23. fsspec/implementations/reference.py +127 -56
  24. fsspec/implementations/sftp.py +6 -5
  25. fsspec/implementations/smb.py +0 -1
  26. fsspec/implementations/tar.py +2 -1
  27. fsspec/implementations/webhdfs.py +46 -5
  28. fsspec/implementations/zip.py +11 -3
  29. fsspec/parquet.py +3 -5
  30. fsspec/registry.py +2 -1
  31. fsspec/spec.py +51 -61
  32. fsspec/tests/abstract/common.py +5 -5
  33. fsspec/tests/abstract/copy.py +21 -7
  34. fsspec/tests/abstract/put.py +21 -7
  35. fsspec/transaction.py +8 -4
  36. fsspec/utils.py +114 -1
  37. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/METADATA +1 -2
  38. fsspec-2024.2.0.dist-info/RECORD +54 -0
  39. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/WHEEL +1 -1
  40. fsspec-2023.10.0.dist-info/RECORD +0 -53
  41. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/LICENSE +0 -0
  42. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/top_level.txt +0 -0
fsspec/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-10-21T13:35:51-0400",
11
+ "date": "2024-02-04T20:21:42-0500",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "e20f626b87b5bb87d223495a56aefd768272a7ca",
15
- "version": "2023.10.0"
14
+ "full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd",
15
+ "version": "2024.2.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
fsspec/archive.py CHANGED
@@ -38,7 +38,7 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
38
38
  self._get_dirs()
39
39
  path = self._strip_protocol(path)
40
40
  if path in {"", "/"} and self.dir_cache:
41
- return {"name": "/", "type": "directory", "size": 0}
41
+ return {"name": "", "type": "directory", "size": 0}
42
42
  if path in self.dir_cache:
43
43
  return self.dir_cache[path]
44
44
  elif path + "/" in self.dir_cache:
@@ -64,10 +64,10 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
64
64
  # root directory entry
65
65
  ppath = p.rstrip("/").split("/", 1)[0]
66
66
  if ppath not in paths:
67
- out = {"name": ppath + "/", "size": 0, "type": "directory"}
67
+ out = {"name": ppath, "size": 0, "type": "directory"}
68
68
  paths[ppath] = out
69
- out = sorted(paths.values(), key=lambda _: _["name"])
70
69
  if detail:
70
+ out = sorted(paths.values(), key=lambda _: _["name"])
71
71
  return out
72
72
  else:
73
- return [f["name"] for f in out]
73
+ return sorted(paths)
fsspec/asyn.py CHANGED
@@ -11,11 +11,11 @@ from contextlib import contextmanager
11
11
  from glob import has_magic
12
12
  from typing import TYPE_CHECKING, Iterable
13
13
 
14
- from .callbacks import _DEFAULT_CALLBACK
14
+ from .callbacks import DEFAULT_CALLBACK
15
15
  from .exceptions import FSTimeoutError
16
16
  from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
17
  from .spec import AbstractBufferedFile, AbstractFileSystem
18
- from .utils import is_exception, other_paths
18
+ from .utils import glob_translate, is_exception, other_paths
19
19
 
20
20
  private = re.compile("_[^_]")
21
21
  iothread = [None] # dedicated fsspec IO thread
@@ -106,7 +106,7 @@ def sync(loop, func, *args, timeout=None, **kwargs):
106
106
 
107
107
 
108
108
  def sync_wrapper(func, obj=None):
109
- """Given a function, make so can be called in async or blocking contexts
109
+ """Given a function, make so can be called in blocking contexts
110
110
 
111
111
  Leave obj=None if defining within a class. Pass the instance if attaching
112
112
  as an attribute of the instance.
@@ -205,7 +205,7 @@ def running_async() -> bool:
205
205
  async def _run_coros_in_chunks(
206
206
  coros,
207
207
  batch_size=None,
208
- callback=_DEFAULT_CALLBACK,
208
+ callback=DEFAULT_CALLBACK,
209
209
  timeout=None,
210
210
  return_exceptions=False,
211
211
  nofiles=False,
@@ -245,7 +245,7 @@ async def _run_coros_in_chunks(
245
245
  asyncio.Task(asyncio.wait_for(c, timeout=timeout))
246
246
  for c in coros[start : start + batch_size]
247
247
  ]
248
- if callback is not _DEFAULT_CALLBACK:
248
+ if callback is not DEFAULT_CALLBACK:
249
249
  [
250
250
  t.add_done_callback(lambda *_, **__: callback.relative_update(1))
251
251
  for t in chunk
@@ -467,6 +467,16 @@ class AsyncFileSystem(AbstractFileSystem):
467
467
  on_error="return",
468
468
  **kwargs,
469
469
  ):
470
+ """Get the contents of byte ranges from one or more files
471
+
472
+ Parameters
473
+ ----------
474
+ paths: list
475
+ A list of of filepaths on this filesystems
476
+ starts, ends: int or list
477
+ Bytes limits of the read. If using a single int, the same value will be
478
+ used to read all the specified files.
479
+ """
470
480
  # TODO: on_error
471
481
  if max_gap is not None:
472
482
  # use utils.merge_offset_ranges
@@ -476,7 +486,7 @@ class AsyncFileSystem(AbstractFileSystem):
476
486
  if not isinstance(starts, Iterable):
477
487
  starts = [starts] * len(paths)
478
488
  if not isinstance(ends, Iterable):
479
- ends = [starts] * len(paths)
489
+ ends = [ends] * len(paths)
480
490
  if len(starts) != len(paths) or len(ends) != len(paths):
481
491
  raise ValueError
482
492
  coros = [
@@ -496,7 +506,7 @@ class AsyncFileSystem(AbstractFileSystem):
496
506
  lpath,
497
507
  rpath,
498
508
  recursive=False,
499
- callback=_DEFAULT_CALLBACK,
509
+ callback=DEFAULT_CALLBACK,
500
510
  batch_size=None,
501
511
  maxdepth=None,
502
512
  **kwargs,
@@ -558,8 +568,8 @@ class AsyncFileSystem(AbstractFileSystem):
558
568
  coros = []
559
569
  callback.set_size(len(file_pairs))
560
570
  for lfile, rfile in file_pairs:
561
- callback.branch(lfile, rfile, kwargs)
562
- coros.append(self._put_file(lfile, rfile, **kwargs))
571
+ put_file = callback.branch_coro(self._put_file)
572
+ coros.append(put_file(lfile, rfile, **kwargs))
563
573
 
564
574
  return await _run_coros_in_chunks(
565
575
  coros, batch_size=batch_size, callback=callback
@@ -573,7 +583,7 @@ class AsyncFileSystem(AbstractFileSystem):
573
583
  rpath,
574
584
  lpath,
575
585
  recursive=False,
576
- callback=_DEFAULT_CALLBACK,
586
+ callback=DEFAULT_CALLBACK,
577
587
  maxdepth=None,
578
588
  **kwargs,
579
589
  ):
@@ -635,8 +645,8 @@ class AsyncFileSystem(AbstractFileSystem):
635
645
  coros = []
636
646
  callback.set_size(len(lpaths))
637
647
  for lpath, rpath in zip(lpaths, rpaths):
638
- callback.branch(rpath, lpath, kwargs)
639
- coros.append(self._get_file(rpath, lpath, **kwargs))
648
+ get_file = callback.branch_coro(self._get_file)
649
+ coros.append(get_file(rpath, lpath, **kwargs))
640
650
  return await _run_coros_in_chunks(
641
651
  coros, batch_size=batch_size, callback=callback
642
652
  )
@@ -662,9 +672,9 @@ class AsyncFileSystem(AbstractFileSystem):
662
672
  [self._size(p) for p in paths], batch_size=batch_size
663
673
  )
664
674
 
665
- async def _exists(self, path):
675
+ async def _exists(self, path, **kwargs):
666
676
  try:
667
- await self._info(path)
677
+ await self._info(path, **kwargs)
668
678
  return True
669
679
  except FileNotFoundError:
670
680
  return False
@@ -735,8 +745,12 @@ class AsyncFileSystem(AbstractFileSystem):
735
745
 
736
746
  import re
737
747
 
738
- ends = path.endswith("/")
748
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
749
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
739
750
  path = self._strip_protocol(path)
751
+ append_slash_to_dirname = ends_with_sep or path.endswith(
752
+ tuple(sep + "**" for sep in seps)
753
+ )
740
754
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
741
755
  idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
742
756
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
@@ -746,11 +760,11 @@ class AsyncFileSystem(AbstractFileSystem):
746
760
  detail = kwargs.pop("detail", False)
747
761
 
748
762
  if not has_magic(path):
749
- if await self._exists(path):
763
+ if await self._exists(path, **kwargs):
750
764
  if not detail:
751
765
  return [path]
752
766
  else:
753
- return {path: await self._info(path)}
767
+ return {path: await self._info(path, **kwargs)}
754
768
  else:
755
769
  if not detail:
756
770
  return [] # glob of non-existent returns empty
@@ -775,46 +789,22 @@ class AsyncFileSystem(AbstractFileSystem):
775
789
  allpaths = await self._find(
776
790
  root, maxdepth=depth, withdirs=True, detail=True, **kwargs
777
791
  )
778
- # Escape characters special to python regex, leaving our supported
779
- # special characters in place.
780
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
781
- # for shell globbing details.
782
- pattern = (
783
- "^"
784
- + (
785
- path.replace("\\", r"\\")
786
- .replace(".", r"\.")
787
- .replace("+", r"\+")
788
- .replace("//", "/")
789
- .replace("(", r"\(")
790
- .replace(")", r"\)")
791
- .replace("|", r"\|")
792
- .replace("^", r"\^")
793
- .replace("$", r"\$")
794
- .replace("{", r"\{")
795
- .replace("}", r"\}")
796
- .rstrip("/")
797
- .replace("?", ".")
798
- )
799
- + "$"
800
- )
801
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
802
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
803
- pattern = re.sub("[*]", "[^/]*", pattern)
804
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
805
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
792
+
793
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
806
794
  pattern = re.compile(pattern)
795
+
807
796
  out = {
808
- p: allpaths[p]
809
- for p in sorted(allpaths)
810
- if pattern.match(p.replace("//", "/").rstrip("/"))
797
+ p: info
798
+ for p, info in sorted(allpaths.items())
799
+ if pattern.match(
800
+ (
801
+ p + "/"
802
+ if append_slash_to_dirname and info["type"] == "directory"
803
+ else p
804
+ )
805
+ )
811
806
  }
812
807
 
813
- # Return directories only when the glob end by a slash
814
- # This is needed for posix glob compliance
815
- if ends:
816
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
817
-
818
808
  if detail:
819
809
  return out
820
810
  else:
fsspec/caching.py CHANGED
@@ -111,7 +111,7 @@ class MMapCache(BaseCache):
111
111
  fd.write(b"1")
112
112
  fd.flush()
113
113
  else:
114
- fd = open(self.location, "rb+")
114
+ fd = open(self.location, "r+b")
115
115
 
116
116
  return mmap.mmap(fd.fileno(), self.size)
117
117
 
fsspec/callbacks.py CHANGED
@@ -1,3 +1,6 @@
1
+ from functools import wraps
2
+
3
+
1
4
  class Callback:
2
5
  """
3
6
  Base class and interface for callback mechanism
@@ -25,6 +28,60 @@ class Callback:
25
28
  self.hooks = hooks or {}
26
29
  self.kw = kwargs
27
30
 
31
+ def __enter__(self):
32
+ return self
33
+
34
+ def __exit__(self, *exc_args):
35
+ self.close()
36
+
37
+ def close(self):
38
+ """Close callback."""
39
+
40
+ def branched(self, path_1, path_2, **kwargs):
41
+ """
42
+ Return callback for child transfers
43
+
44
+ If this callback is operating at a higher level, e.g., put, which may
45
+ trigger transfers that can also be monitored. The function returns a callback
46
+ that has to be passed to the child method, e.g., put_file,
47
+ as `callback=` argument.
48
+
49
+ The implementation uses `callback.branch` for compatibility.
50
+ When implementing callbacks, it is recommended to override this function instead
51
+ of `branch` and avoid calling `super().branched(...)`.
52
+
53
+ Prefer using this function over `branch`.
54
+
55
+ Parameters
56
+ ----------
57
+ path_1: str
58
+ Child's source path
59
+ path_2: str
60
+ Child's destination path
61
+ **kwargs:
62
+ Arbitrary keyword arguments
63
+
64
+ Returns
65
+ -------
66
+ callback: Callback
67
+ A callback instance to be passed to the child method
68
+ """
69
+ self.branch(path_1, path_2, kwargs)
70
+ # mutate kwargs so that we can force the caller to pass "callback=" explicitly
71
+ return kwargs.pop("callback", DEFAULT_CALLBACK)
72
+
73
+ def branch_coro(self, fn):
74
+ """
75
+ Wraps a coroutine, and pass a new child callback to it.
76
+ """
77
+
78
+ @wraps(fn)
79
+ async def func(path1, path2: str, **kwargs):
80
+ with self.branched(path1, path2, **kwargs) as child:
81
+ return await fn(path1, path2, callback=child, **kwargs)
82
+
83
+ return func
84
+
28
85
  def set_size(self, size):
29
86
  """
30
87
  Set the internal maximum size attribute
@@ -140,10 +197,10 @@ class Callback:
140
197
 
141
198
  For the special value of ``None``, return the global instance of
142
199
  ``NoOpCallback``. This is an alternative to including
143
- ``callback=_DEFAULT_CALLBACK`` directly in a method signature.
200
+ ``callback=DEFAULT_CALLBACK`` directly in a method signature.
144
201
  """
145
202
  if maybe_callback is None:
146
- return _DEFAULT_CALLBACK
203
+ return DEFAULT_CALLBACK
147
204
  return maybe_callback
148
205
 
149
206
 
@@ -186,7 +243,9 @@ class TqdmCallback(Callback):
186
243
  tqdm_kwargs : dict, (optional)
187
244
  Any argument accepted by the tqdm constructor.
188
245
  See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
189
- Will be forwarded to tqdm.
246
+ Will be forwarded to `tqdm_cls`.
247
+ tqdm_cls: (optional)
248
+ subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
190
249
 
191
250
  Examples
192
251
  --------
@@ -209,30 +268,57 @@ class TqdmCallback(Callback):
209
268
  recursive=True,
210
269
  callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
211
270
  )
271
+
272
+ You can also customize the progress bar by passing a subclass of `tqdm`.
273
+
274
+ .. code-block:: python
275
+
276
+ class TqdmFormat(tqdm):
277
+ '''Provides a `total_time` format parameter'''
278
+ @property
279
+ def format_dict(self):
280
+ d = super().format_dict
281
+ total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
282
+ d.update(total_time=self.format_interval(total_time) + " in total")
283
+ return d
284
+
285
+ >>> with TqdmCallback(
286
+ tqdm_kwargs={
287
+ "desc": "desc",
288
+ "bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
289
+ },
290
+ tqdm_cls=TqdmFormat,
291
+ ) as callback:
292
+ fs.upload(".", path2distant_data, recursive=True, callback=callback)
212
293
  """
213
294
 
214
295
  def __init__(self, tqdm_kwargs=None, *args, **kwargs):
215
296
  try:
216
- import tqdm
297
+ from tqdm import tqdm
217
298
 
218
- self._tqdm = tqdm
219
299
  except ImportError as exce:
220
300
  raise ImportError(
221
301
  "Using TqdmCallback requires tqdm to be installed"
222
302
  ) from exce
223
303
 
304
+ self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
224
305
  self._tqdm_kwargs = tqdm_kwargs or {}
306
+ self.tqdm = None
225
307
  super().__init__(*args, **kwargs)
226
308
 
227
- def set_size(self, size):
228
- self.tqdm = self._tqdm.tqdm(total=size, **self._tqdm_kwargs)
309
+ def call(self, *args, **kwargs):
310
+ if self.tqdm is None:
311
+ self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
312
+ self.tqdm.total = self.size
313
+ self.tqdm.update(self.value - self.tqdm.n)
229
314
 
230
- def relative_update(self, inc=1):
231
- self.tqdm.update(inc)
315
+ def close(self):
316
+ if self.tqdm is not None:
317
+ self.tqdm.close()
318
+ self.tqdm = None
232
319
 
233
320
  def __del__(self):
234
- self.tqdm.close()
235
- self.tqdm = None
321
+ return self.close()
236
322
 
237
323
 
238
- _DEFAULT_CALLBACK = NoOpCallback()
324
+ DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
fsspec/compression.py CHANGED
@@ -90,15 +90,15 @@ except ImportError:
90
90
  try:
91
91
  from lzma import LZMAFile
92
92
 
93
- register_compression("lzma", LZMAFile, "xz")
94
- register_compression("xz", LZMAFile, "xz", force=True)
93
+ register_compression("lzma", LZMAFile, "lzma")
94
+ register_compression("xz", LZMAFile, "xz")
95
95
  except ImportError:
96
96
  pass
97
97
 
98
98
  try:
99
99
  import lzmaffi
100
100
 
101
- register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True)
101
+ register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
102
102
  register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
103
103
  except ImportError:
104
104
  pass
fsspec/core.py CHANGED
@@ -1,8 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import io
2
4
  import logging
3
5
  import os
4
6
  import re
5
7
  from glob import has_magic
8
+ from pathlib import Path
6
9
 
7
10
  # for backwards compat, we export cache things from here too
8
11
  from .caching import ( # noqa: F401
@@ -290,7 +293,11 @@ def open_files(
290
293
  fs.auto_mkdir = auto_mkdir
291
294
  elif "r" not in mode and auto_mkdir:
292
295
  parents = {fs._parent(path) for path in paths}
293
- [fs.makedirs(parent, exist_ok=True) for parent in parents]
296
+ for parent in parents:
297
+ try:
298
+ fs.makedirs(parent, exist_ok=True)
299
+ except PermissionError:
300
+ pass
294
301
  return OpenFiles(
295
302
  [
296
303
  OpenFile(
@@ -465,7 +472,11 @@ def open(
465
472
  return out[0]
466
473
 
467
474
 
468
- def open_local(url, mode="rb", **storage_options):
475
+ def open_local(
476
+ url: str | list[str] | Path | list[Path],
477
+ mode: str = "rb",
478
+ **storage_options: dict,
479
+ ) -> str | list[str]:
469
480
  """Open file(s) which can be resolved to local
470
481
 
471
482
  For files which either are local, or get downloaded upon open
@@ -489,7 +500,7 @@ def open_local(url, mode="rb", **storage_options):
489
500
  )
490
501
  with of as files:
491
502
  paths = [f.name for f in files]
492
- if isinstance(url, str) and not has_magic(url):
503
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
493
504
  return paths[0]
494
505
  return paths
495
506
 
@@ -510,6 +521,8 @@ def split_protocol(urlpath):
510
521
  if len(protocol) > 1:
511
522
  # excludes Windows paths
512
523
  return protocol, path
524
+ if urlpath.startswith("data:"):
525
+ return urlpath.split(":", 1)
513
526
  return None, urlpath
514
527
 
515
528
 
fsspec/exceptions.py CHANGED
@@ -10,12 +10,8 @@ class BlocksizeMismatchError(ValueError):
10
10
  written with
11
11
  """
12
12
 
13
- ...
14
-
15
13
 
16
14
  class FSTimeoutError(asyncio.TimeoutError):
17
15
  """
18
16
  Raised when a fsspec function timed out occurs
19
17
  """
20
-
21
- ...
fsspec/generic.py CHANGED
@@ -8,7 +8,7 @@ import uuid
8
8
  from typing import Optional
9
9
 
10
10
  from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
11
- from .callbacks import _DEFAULT_CALLBACK
11
+ from .callbacks import DEFAULT_CALLBACK
12
12
  from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
13
13
 
14
14
  _generic_fs = {}
@@ -171,6 +171,10 @@ class GenericFileSystem(AsyncFileSystem):
171
171
  self.method = default_method
172
172
  super().__init__(**kwargs)
173
173
 
174
+ def _parent(self, path):
175
+ fs = _resolve_fs(path, self.method)
176
+ return fs.unstrip_protocol(fs._parent(path))
177
+
174
178
  def _strip_protocol(self, path):
175
179
  # normalization only
176
180
  fs = _resolve_fs(path, self.method)
@@ -246,9 +250,12 @@ class GenericFileSystem(AsyncFileSystem):
246
250
  return fs.pipe_file(path, value, **kwargs)
247
251
 
248
252
  async def _rm(self, url, **kwargs):
249
- fs = _resolve_fs(url, self.method)
253
+ urls = url
254
+ if isinstance(urls, str):
255
+ urls = [urls]
256
+ fs = _resolve_fs(urls[0], self.method)
250
257
  if fs.async_impl:
251
- await fs._rm(url, **kwargs)
258
+ await fs._rm(urls, **kwargs)
252
259
  else:
253
260
  fs.rm(url, **kwargs)
254
261
 
@@ -272,7 +279,7 @@ class GenericFileSystem(AsyncFileSystem):
272
279
  url,
273
280
  url2,
274
281
  blocksize=2**20,
275
- callback=_DEFAULT_CALLBACK,
282
+ callback=DEFAULT_CALLBACK,
276
283
  **kwargs,
277
284
  ):
278
285
  fs = _resolve_fs(url, self.method)
fsspec/gui.py CHANGED
@@ -153,8 +153,9 @@ class SigSlot:
153
153
  break
154
154
  except Exception as e:
155
155
  logger.exception(
156
- "Exception (%s) while executing callback for signal: %s"
157
- "" % (e, sig)
156
+ "Exception (%s) while executing callback for signal: %s",
157
+ e,
158
+ sig,
158
159
  )
159
160
 
160
161
  def show(self, threads=False):
@@ -242,7 +243,7 @@ class FileSelector(SigSlot):
242
243
  else:
243
244
  self.init_protocol, url = "file", os.getcwd()
244
245
  self.init_url = url
245
- self.init_kwargs = kwargs or "{}"
246
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
246
247
  self.filters = filters
247
248
  self.ignore = [re.compile(i) for i in ignore or []]
248
249
  self._fs = None
@@ -5,6 +5,7 @@ import secrets
5
5
  import shutil
6
6
  from contextlib import suppress
7
7
  from functools import cached_property, wraps
8
+ from urllib.parse import parse_qs
8
9
 
9
10
  from fsspec.spec import AbstractFileSystem
10
11
  from fsspec.utils import (
@@ -255,6 +256,7 @@ class HadoopFileSystem(ArrowFSWrapper):
255
256
  port=0,
256
257
  user=None,
257
258
  kerb_ticket=None,
259
+ replication=3,
258
260
  extra_conf=None,
259
261
  **kwargs,
260
262
  ):
@@ -270,6 +272,8 @@ class HadoopFileSystem(ArrowFSWrapper):
270
272
  If given, connect as this username
271
273
  kerb_ticket: str or None
272
274
  If given, use this ticket for authentication
275
+ replication: int
276
+ set replication factor of file for write operations. default value is 3.
273
277
  extra_conf: None or dict
274
278
  Passed on to HadoopFileSystem
275
279
  """
@@ -280,6 +284,7 @@ class HadoopFileSystem(ArrowFSWrapper):
280
284
  port=port,
281
285
  user=user,
282
286
  kerb_ticket=kerb_ticket,
287
+ replication=replication,
283
288
  extra_conf=extra_conf,
284
289
  )
285
290
  super().__init__(fs=fs, **kwargs)
@@ -294,4 +299,8 @@ class HadoopFileSystem(ArrowFSWrapper):
294
299
  out["user"] = ops["username"]
295
300
  if ops.get("port", None):
296
301
  out["port"] = ops["port"]
302
+ if ops.get("url_query", None):
303
+ queries = parse_qs(ops["url_query"])
304
+ if queries.get("replication", None):
305
+ out["replication"] = int(queries["replication"][0])
297
306
  return out
@@ -2,13 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import hashlib
5
- from typing import TYPE_CHECKING
6
5
 
7
6
  from fsspec.implementations.local import make_path_posix
8
7
 
9
- if TYPE_CHECKING:
10
- from typing import Any
11
-
12
8
 
13
9
  class AbstractCacheMapper(abc.ABC):
14
10
  """Abstract super-class for mappers from remote URLs to local cached
@@ -19,7 +15,7 @@ class AbstractCacheMapper(abc.ABC):
19
15
  def __call__(self, path: str) -> str:
20
16
  ...
21
17
 
22
- def __eq__(self, other: Any) -> bool:
18
+ def __eq__(self, other: object) -> bool:
23
19
  # Identity only depends on class. When derived classes have attributes
24
20
  # they will need to be included.
25
21
  return isinstance(other, type(self))
@@ -56,7 +52,7 @@ class BasenameCacheMapper(AbstractCacheMapper):
56
52
  else:
57
53
  return prefix # No separator found, simple filename
58
54
 
59
- def __eq__(self, other: Any) -> bool:
55
+ def __eq__(self, other: object) -> bool:
60
56
  return super().__eq__(other) and self.directory_levels == other.directory_levels
61
57
 
62
58
  def __hash__(self) -> int: