fsspec 2023.12.2__py3-none-any.whl → 2024.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,11 +7,10 @@ from copy import copy
7
7
  from urllib.parse import urlparse
8
8
 
9
9
  import aiohttp
10
- import requests
11
10
  import yarl
12
11
 
13
12
  from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_wrapper
14
- from fsspec.callbacks import _DEFAULT_CALLBACK
13
+ from fsspec.callbacks import DEFAULT_CALLBACK
15
14
  from fsspec.exceptions import FSTimeoutError
16
15
  from fsspec.spec import AbstractBufferedFile
17
16
  from fsspec.utils import (
@@ -124,7 +123,7 @@ class HTTPFileSystem(AsyncFileSystem):
124
123
  try:
125
124
  sync(loop, session.close, timeout=0.1)
126
125
  return
127
- except (TimeoutError, FSTimeoutError):
126
+ except (TimeoutError, FSTimeoutError, NotImplementedError):
128
127
  pass
129
128
  connector = getattr(session, "_connector", None)
130
129
  if connector is not None:
@@ -235,7 +234,7 @@ class HTTPFileSystem(AsyncFileSystem):
235
234
  return out
236
235
 
237
236
  async def _get_file(
238
- self, rpath, lpath, chunk_size=5 * 2**20, callback=_DEFAULT_CALLBACK, **kwargs
237
+ self, rpath, lpath, chunk_size=5 * 2**20, callback=DEFAULT_CALLBACK, **kwargs
239
238
  ):
240
239
  kw = self.kwargs.copy()
241
240
  kw.update(kwargs)
@@ -252,7 +251,7 @@ class HTTPFileSystem(AsyncFileSystem):
252
251
  if isfilelike(lpath):
253
252
  outfile = lpath
254
253
  else:
255
- outfile = open(lpath, "wb")
254
+ outfile = open(lpath, "wb") # noqa: ASYNC101
256
255
 
257
256
  try:
258
257
  chunk = True
@@ -269,7 +268,7 @@ class HTTPFileSystem(AsyncFileSystem):
269
268
  lpath,
270
269
  rpath,
271
270
  chunk_size=5 * 2**20,
272
- callback=_DEFAULT_CALLBACK,
271
+ callback=DEFAULT_CALLBACK,
273
272
  method="post",
274
273
  **kwargs,
275
274
  ):
@@ -280,7 +279,7 @@ class HTTPFileSystem(AsyncFileSystem):
280
279
  context = nullcontext(lpath)
281
280
  use_seek = False # might not support seeking
282
281
  else:
283
- context = open(lpath, "rb")
282
+ context = open(lpath, "rb") # noqa: ASYNC101
284
283
  use_seek = True
285
284
 
286
285
  with context as f:
@@ -319,7 +318,7 @@ class HTTPFileSystem(AsyncFileSystem):
319
318
  r = await session.get(self.encode_url(path), **kw)
320
319
  async with r:
321
320
  return r.status < 400
322
- except (requests.HTTPError, aiohttp.ClientError):
321
+ except aiohttp.ClientError:
323
322
  return False
324
323
 
325
324
  async def _isfile(self, path, **kwargs):
@@ -529,7 +528,7 @@ class HTTPFile(AbstractBufferedFile):
529
528
  ----------
530
529
  url: str
531
530
  Full URL of the remote resource, including the protocol
532
- session: requests.Session or None
531
+ session: aiohttp.ClientSession or None
533
532
  All calls will be made within this session, to avoid restarting
534
533
  connections where the server allows this
535
534
  block_size: int or None
@@ -802,7 +801,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
802
801
  async with r:
803
802
  out = await r.read()
804
803
  if file:
805
- with open(file, "r+b") as f:
804
+ with open(file, "r+b") as f: # noqa: ASYNC101
806
805
  f.seek(start)
807
806
  f.write(out)
808
807
  else:
@@ -847,6 +846,11 @@ async def _file_info(url, session, size_policy="head", **kwargs):
847
846
  elif "Content-Range" in r.headers:
848
847
  info["size"] = int(r.headers["Content-Range"].split("/")[1])
849
848
 
849
+ if "Content-Type" in r.headers:
850
+ info["mimetype"] = r.headers["Content-Type"].partition(";")[0]
851
+
852
+ info["url"] = str(r.url)
853
+
850
854
  for checksum_field in ["ETag", "Content-MD5", "Digest"]:
851
855
  if r.headers.get(checksum_field):
852
856
  info[checksum_field] = r.headers[checksum_field]
@@ -3,7 +3,6 @@ import io
3
3
  import logging
4
4
  import os
5
5
  import os.path as osp
6
- import posixpath
7
6
  import re
8
7
  import shutil
9
8
  import stat
@@ -59,11 +58,16 @@ class LocalFileSystem(AbstractFileSystem):
59
58
 
60
59
  def ls(self, path, detail=False, **kwargs):
61
60
  path = self._strip_protocol(path)
62
- if detail:
61
+ info = self.info(path)
62
+ if info["type"] == "directory":
63
63
  with os.scandir(path) as it:
64
- return [self.info(f) for f in it]
64
+ infos = [self.info(f) for f in it]
65
65
  else:
66
- return [posixpath.join(path, f) for f in os.listdir(path)]
66
+ infos = [info]
67
+
68
+ if not detail:
69
+ return [i["name"] for i in infos]
70
+ return infos
67
71
 
68
72
  def info(self, path, **kwargs):
69
73
  if isinstance(path, os.DirEntry):
@@ -8,7 +8,7 @@ from typing import Any, ClassVar
8
8
 
9
9
  from fsspec import AbstractFileSystem
10
10
 
11
- logger = logging.Logger("fsspec.memoryfs")
11
+ logger = logging.getLogger("fsspec.memoryfs")
12
12
 
13
13
 
14
14
  class MemoryFileSystem(AbstractFileSystem):
@@ -17,7 +17,7 @@ except ImportError:
17
17
  import json
18
18
 
19
19
  from ..asyn import AsyncFileSystem
20
- from ..callbacks import _DEFAULT_CALLBACK
20
+ from ..callbacks import DEFAULT_CALLBACK
21
21
  from ..core import filesystem, open, split_protocol
22
22
  from ..utils import isfilelike, merge_offset_ranges, other_paths
23
23
 
@@ -106,6 +106,12 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
106
106
  self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
107
107
  ):
108
108
  """
109
+
110
+ This instance will be writable, storing changes in memory until full partitions
111
+ are accumulated or .flush() is called.
112
+
113
+ To create an empty lazy store, use .create()
114
+
109
115
  Parameters
110
116
  ----------
111
117
  root : str
@@ -119,26 +125,35 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
119
125
  Encode urls as pandas.Categorical to reduce memory footprint if the ratio
120
126
  of the number of unique urls to total number of refs for each variable
121
127
  is greater than or equal to this number. (default 10)
122
-
123
-
124
128
  """
125
129
  self.root = root
126
130
  self.chunk_sizes = {}
127
- self._items = {}
131
+ self.out_root = out_root or self.root
132
+ self.cat_thresh = categorical_threshold
133
+ self.cache_size = cache_size
128
134
  self.dirs = None
135
+ self.url = self.root + "/{field}/refs.{record}.parq"
136
+ # TODO: derive fs from `root`
129
137
  self.fs = fsspec.filesystem("file") if fs is None else fs
138
+
139
+ def __getattr__(self, item):
140
+ if item in ("_items", "record_size", "zmetadata"):
141
+ self.setup()
142
+ # avoid possible recursion if setup fails somehow
143
+ return self.__dict__[item]
144
+ raise AttributeError(item)
145
+
146
+ def setup(self):
147
+ self._items = {}
130
148
  self._items[".zmetadata"] = self.fs.cat_file(
131
149
  "/".join([self.root, ".zmetadata"])
132
150
  )
133
151
  met = json.loads(self._items[".zmetadata"])
134
152
  self.record_size = met["record_size"]
135
153
  self.zmetadata = met["metadata"]
136
- self.url = self.root + "/{field}/refs.{record}.parq"
137
- self.out_root = out_root or self.root
138
- self.cat_thresh = categorical_threshold
139
154
 
140
155
  # Define function to open and decompress refs
141
- @lru_cache(maxsize=cache_size)
156
+ @lru_cache(maxsize=self.cache_size)
142
157
  def open_refs(field, record):
143
158
  """cached parquet file loader"""
144
159
  path = self.url.format(field=field, record=record)
@@ -153,6 +168,8 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
153
168
  def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
154
169
  """Make empty parquet reference set
155
170
 
171
+ First deletes the contents of the given directory, if it exists.
172
+
156
173
  Parameters
157
174
  ----------
158
175
  root: str
@@ -172,12 +189,15 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
172
189
  met = {"metadata": {}, "record_size": record_size}
173
190
  if fs is None:
174
191
  fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
192
+ if fs.exists(root):
193
+ fs.rm(root, recursive=True)
175
194
  fs.makedirs(root, exist_ok=True)
176
195
  fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
177
196
  return LazyReferenceMapper(root, fs, **kwargs)
178
197
 
179
198
  def listdir(self, basename=True):
180
199
  """List top-level directories"""
200
+ # cache me?
181
201
  if self.dirs is None:
182
202
  dirs = [p.split("/", 1)[0] for p in self.zmetadata]
183
203
  self.dirs = {p for p in dirs if p and not p.startswith(".")}
@@ -258,19 +278,18 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
258
278
  elif "/" not in key or self._is_meta(key):
259
279
  raise KeyError(key)
260
280
  field, sub_key = key.split("/")
261
- record, _, _ = self._key_to_record(key)
262
- maybe = self._items.get((field, key), {}).get(sub_key, False)
281
+ record, ri, chunk_size = self._key_to_record(key)
282
+ maybe = self._items.get((field, record), {}).get(ri, False)
263
283
  if maybe is None:
264
284
  # explicitly deleted
265
285
  raise KeyError
266
286
  elif maybe:
267
287
  return maybe
288
+ elif chunk_size == 0:
289
+ return b""
268
290
 
269
291
  # Chunk keys can be loaded from row group and cached in LRU cache
270
292
  try:
271
- record, ri, chunk_size = self._key_to_record(key)
272
- if chunk_size == 0:
273
- return b""
274
293
  refs = self.open_refs(field, record)
275
294
  except (ValueError, TypeError, FileNotFoundError):
276
295
  raise KeyError(key)
@@ -280,7 +299,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
280
299
  if raw is not None:
281
300
  return raw
282
301
  if selection[0] is None:
283
- raise KeyError("This reference has been deleted")
302
+ raise KeyError("This reference does not exist or has been deleted")
284
303
  if selection[1:3] == [0, 0]:
285
304
  # URL only
286
305
  return selection[:1]
@@ -307,7 +326,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
307
326
  size_ratio = [
308
327
  math.ceil(s / c) for s, c in zip(zarray["shape"], zarray["chunks"])
309
328
  ]
310
- self.chunk_sizes[field] = size_ratio
329
+ self.chunk_sizes[field] = size_ratio or [1]
311
330
  return self.chunk_sizes[field]
312
331
 
313
332
  def _generate_record(self, field, record):
@@ -342,7 +361,6 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
342
361
  def __hash__(self):
343
362
  return id(self)
344
363
 
345
- @lru_cache(20)
346
364
  def __getitem__(self, key):
347
365
  return self._load_one_key(key)
348
366
 
@@ -357,9 +375,10 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
357
375
  else:
358
376
  # metadata or top-level
359
377
  self._items[key] = value
360
- self.zmetadata[key] = json.loads(
378
+ new_value = json.loads(
361
379
  value.decode() if isinstance(value, bytes) else value
362
380
  )
381
+ self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
363
382
 
364
383
  @staticmethod
365
384
  def _is_meta(key):
@@ -373,9 +392,9 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
373
392
  else:
374
393
  if "/" in key and not self._is_meta(key):
375
394
  field, chunk = key.split("/")
376
- record, _, _ = self._key_to_record(key)
395
+ record, i, _ = self._key_to_record(key)
377
396
  subdict = self._items.setdefault((field, record), {})
378
- subdict[chunk] = None
397
+ subdict[i] = None
379
398
  if len(subdict) == self.record_size:
380
399
  self.write(field, record)
381
400
  else:
@@ -388,26 +407,43 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
388
407
  import numpy as np
389
408
  import pandas as pd
390
409
 
391
- # TODO: if the dict is incomplete, also load records and merge in
392
410
  partition = self._items[(field, record)]
393
- fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
411
+ original = False
412
+ if len(partition) < self.record_size:
413
+ try:
414
+ original = self.open_refs(field, record)
415
+ except IOError:
416
+ pass
394
417
 
395
- ####
396
- paths = np.full(self.record_size, np.nan, dtype="O")
397
- offsets = np.zeros(self.record_size, dtype="int64")
398
- sizes = np.zeros(self.record_size, dtype="int64")
399
- raws = np.full(self.record_size, np.nan, dtype="O")
400
- nraw = 0
401
- npath = 0
418
+ if original:
419
+ paths = original["path"]
420
+ offsets = original["offset"]
421
+ sizes = original["size"]
422
+ raws = original["raw"]
423
+ else:
424
+ paths = np.full(self.record_size, np.nan, dtype="O")
425
+ offsets = np.zeros(self.record_size, dtype="int64")
426
+ sizes = np.zeros(self.record_size, dtype="int64")
427
+ raws = np.full(self.record_size, np.nan, dtype="O")
402
428
  for j, data in partition.items():
403
429
  if isinstance(data, list):
404
- npath += 1
430
+ if (
431
+ str(paths.dtype) == "category"
432
+ and data[0] not in paths.dtype.categories
433
+ ):
434
+ paths = paths.add_categories(data[0])
405
435
  paths[j] = data[0]
406
436
  if len(data) > 1:
407
437
  offsets[j] = data[1]
408
438
  sizes[j] = data[2]
439
+ elif data is None:
440
+ # delete
441
+ paths[j] = None
442
+ offsets[j] = 0
443
+ sizes[j] = 0
444
+ raws[j] = None
409
445
  else:
410
- nraw += 1
446
+ # this is the only call into kerchunk, could remove
411
447
  raws[j] = kerchunk.df._proc_raw(data)
412
448
  # TODO: only save needed columns
413
449
  df = pd.DataFrame(
@@ -424,6 +460,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
424
460
  object_encoding = {"raw": "bytes", "path": "utf8"}
425
461
  has_nulls = ["path", "raw"]
426
462
 
463
+ fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
427
464
  self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
428
465
  df.to_parquet(
429
466
  fn,
@@ -474,29 +511,30 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
474
511
  self.open_refs.cache_clear()
475
512
 
476
513
  def __len__(self):
477
- # Caveat: This counts expected references, not actual
514
+ # Caveat: This counts expected references, not actual - but is fast
478
515
  count = 0
479
516
  for field in self.listdir():
480
517
  if field.startswith("."):
481
518
  count += 1
482
519
  else:
483
- chunk_sizes = self._get_chunk_sizes(field)
484
- nchunks = self.np.product(chunk_sizes)
485
- count += nchunks
520
+ count += math.prod(self._get_chunk_sizes(field))
486
521
  count += len(self.zmetadata) # all metadata keys
487
- count += len(self._items) # the metadata file itself
522
+ # any other files not in reference partitions
523
+ count += sum(1 for _ in self._items if not isinstance(_, tuple))
488
524
  return count
489
525
 
490
526
  def __iter__(self):
491
- # Caveat: Note that this generates all expected keys, but does not
492
- # account for reference keys that are missing.
527
+ # Caveat: returns only existing keys, so the number of these does not
528
+ # match len(self)
493
529
  metas = set(self.zmetadata)
494
530
  metas.update(self._items)
495
531
  for bit in metas:
496
532
  if isinstance(bit, str):
497
533
  yield bit
498
534
  for field in self.listdir():
499
- yield from self._keys_in_field(field)
535
+ for k in self._keys_in_field(field):
536
+ if k in self:
537
+ yield k
500
538
 
501
539
  def __contains__(self, item):
502
540
  try:
@@ -762,7 +800,7 @@ class ReferenceFileSystem(AsyncFileSystem):
762
800
  with open(lpath, "wb") as f:
763
801
  f.write(data)
764
802
 
765
- def get_file(self, rpath, lpath, callback=_DEFAULT_CALLBACK, **kwargs):
803
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, **kwargs):
766
804
  if self.isdir(rpath):
767
805
  return os.makedirs(lpath, exist_ok=True)
768
806
  data = self.cat_file(rpath, **kwargs)
@@ -1101,7 +1139,7 @@ class ReferenceFileSystem(AsyncFileSystem):
1101
1139
  self.references[path] = data
1102
1140
  self.dircache.clear() # this is a bit heavy handed
1103
1141
 
1104
- async def _put_file(self, lpath, rpath):
1142
+ async def _put_file(self, lpath, rpath, **kwargs):
1105
1143
  # puts binary
1106
1144
  with open(lpath, "rb") as f:
1107
1145
  self.references[rpath] = f.read()
@@ -65,7 +65,7 @@ class SFTPFileSystem(AbstractFileSystem):
65
65
  out.pop("protocol", None)
66
66
  return out
67
67
 
68
- def mkdir(self, path, create_parents=False, mode=511):
68
+ def mkdir(self, path, create_parents=True, mode=511):
69
69
  logger.debug("Creating folder %s", path)
70
70
  if self.exists(path):
71
71
  raise FileExistsError(f"File exists: {path}")
@@ -54,6 +54,8 @@ class WebHDFS(AbstractFileSystem):
54
54
  kerb_kwargs=None,
55
55
  data_proxy=None,
56
56
  use_https=False,
57
+ session_cert=None,
58
+ session_verify=True,
57
59
  **kwargs,
58
60
  ):
59
61
  """
@@ -90,12 +92,19 @@ class WebHDFS(AbstractFileSystem):
90
92
  ``url->data_proxy(url)``.
91
93
  use_https: bool
92
94
  Whether to connect to the Name-node using HTTPS instead of HTTP
95
+ session_cert: str or Tuple[str, str] or None
96
+ Path to a certificate file, or tuple of (cert, key) files to use
97
+ for the requests.Session
98
+ session_verify: str, bool or None
99
+ Path to a certificate file to use for verifying the requests.Session.
93
100
  kwargs
94
101
  """
95
102
  if self._cached:
96
103
  return
97
104
  super().__init__(**kwargs)
98
- self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"
105
+ self.url = (
106
+ f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa
107
+ )
99
108
  self.kerb = kerberos
100
109
  self.kerb_kwargs = kerb_kwargs or {}
101
110
  self.pars = {}
@@ -128,6 +137,10 @@ class WebHDFS(AbstractFileSystem):
128
137
  "If using Kerberos auth, do not specify the "
129
138
  "user, this is handled by kinit."
130
139
  )
140
+
141
+ self.session_cert = session_cert
142
+ self.session_verify = session_verify
143
+
131
144
  self._connect()
132
145
 
133
146
  self._fsid = f"webhdfs_{tokenize(host, port)}"
@@ -138,6 +151,12 @@ class WebHDFS(AbstractFileSystem):
138
151
 
139
152
  def _connect(self):
140
153
  self.session = requests.Session()
154
+
155
+ if self.session_cert:
156
+ self.session.cert = self.session_cert
157
+
158
+ self.session.verify = self.session_verify
159
+
141
160
  if self.kerb:
142
161
  from requests_kerberos import HTTPKerberosAuth
143
162
 
fsspec/parquet.py CHANGED
@@ -131,10 +131,8 @@ def open_parquet_file(
131
131
  cache_type="parts",
132
132
  cache_options={
133
133
  **options,
134
- **{
135
- "data": data.get(fn, {}),
136
- "strict": strict,
137
- },
134
+ "data": data.get(fn, {}),
135
+ "strict": strict,
138
136
  },
139
137
  **kwargs,
140
138
  )
@@ -338,7 +336,7 @@ def _transfer_ranges(fs, blocks, paths, starts, ends):
338
336
 
339
337
  def _add_header_magic(data):
340
338
  # Add b"PAR1" to file headers
341
- for i, path in enumerate(list(data.keys())):
339
+ for path in list(data.keys()):
342
340
  add_magic = True
343
341
  for k in data[path].keys():
344
342
  if k[0] == 0 and k[1] >= 4:
fsspec/spec.py CHANGED
@@ -11,7 +11,7 @@ from glob import has_magic
11
11
  from hashlib import sha256
12
12
  from typing import ClassVar
13
13
 
14
- from .callbacks import _DEFAULT_CALLBACK
14
+ from .callbacks import DEFAULT_CALLBACK
15
15
  from .config import apply_config, conf
16
16
  from .dircache import DirCache
17
17
  from .transaction import Transaction
@@ -876,9 +876,7 @@ class AbstractFileSystem(metaclass=_Cached):
876
876
  else:
877
877
  return self.cat_file(paths[0], **kwargs)
878
878
 
879
- def get_file(
880
- self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
881
- ):
879
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
882
880
  """Copy single remote file to local"""
883
881
  from .implementations.local import LocalFileSystem
884
882
 
@@ -913,7 +911,7 @@ class AbstractFileSystem(metaclass=_Cached):
913
911
  rpath,
914
912
  lpath,
915
913
  recursive=False,
916
- callback=_DEFAULT_CALLBACK,
914
+ callback=DEFAULT_CALLBACK,
917
915
  maxdepth=None,
918
916
  **kwargs,
919
917
  ):
@@ -967,10 +965,10 @@ class AbstractFileSystem(metaclass=_Cached):
967
965
 
968
966
  callback.set_size(len(lpaths))
969
967
  for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
970
- callback.branch(rpath, lpath, kwargs)
971
- self.get_file(rpath, lpath, **kwargs)
968
+ with callback.branched(rpath, lpath) as child:
969
+ self.get_file(rpath, lpath, callback=child, **kwargs)
972
970
 
973
- def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
971
+ def put_file(self, lpath, rpath, callback=DEFAULT_CALLBACK, **kwargs):
974
972
  """Copy single file to remote"""
975
973
  if os.path.isdir(lpath):
976
974
  self.makedirs(rpath, exist_ok=True)
@@ -995,7 +993,7 @@ class AbstractFileSystem(metaclass=_Cached):
995
993
  lpath,
996
994
  rpath,
997
995
  recursive=False,
998
- callback=_DEFAULT_CALLBACK,
996
+ callback=DEFAULT_CALLBACK,
999
997
  maxdepth=None,
1000
998
  **kwargs,
1001
999
  ):
@@ -1053,8 +1051,8 @@ class AbstractFileSystem(metaclass=_Cached):
1053
1051
 
1054
1052
  callback.set_size(len(rpaths))
1055
1053
  for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1056
- callback.branch(lpath, rpath, kwargs)
1057
- self.put_file(lpath, rpath, **kwargs)
1054
+ with callback.branched(lpath, rpath) as child:
1055
+ self.put_file(lpath, rpath, callback=child, **kwargs)
1058
1056
 
1059
1057
  def head(self, path, size=1024):
1060
1058
  """Get the first ``size`` bytes from file"""
@@ -1134,7 +1132,7 @@ class AbstractFileSystem(metaclass=_Cached):
1134
1132
  if maxdepth is not None and maxdepth < 1:
1135
1133
  raise ValueError("maxdepth must be at least 1")
1136
1134
 
1137
- if isinstance(path, str):
1135
+ if isinstance(path, (str, os.PathLike)):
1138
1136
  out = self.expand_path([path], recursive, maxdepth)
1139
1137
  else:
1140
1138
  out = set()
@@ -1400,7 +1398,9 @@ class AbstractFileSystem(metaclass=_Cached):
1400
1398
  )
1401
1399
  return json.dumps(
1402
1400
  dict(
1403
- **{"cls": cls, "protocol": proto, "args": self.storage_args},
1401
+ cls=cls,
1402
+ protocol=proto,
1403
+ args=self.storage_args,
1404
1404
  **self.storage_options,
1405
1405
  )
1406
1406
  )
@@ -1691,6 +1691,8 @@ class AbstractBufferedFile(io.IOBase):
1691
1691
 
1692
1692
  def __eq__(self, other):
1693
1693
  """Files are equal if they have the same checksum, only in read mode"""
1694
+ if self is other:
1695
+ return True
1694
1696
  return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
1695
1697
 
1696
1698
  def commit(self):
@@ -128,7 +128,9 @@ class AbstractCopyTests:
128
128
 
129
129
  # Without recursive does nothing
130
130
  fs.cp(s, t)
131
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
131
+ assert fs.ls(target, detail=False) == (
132
+ [] if supports_empty_directories else [dummy]
133
+ )
132
134
 
133
135
  # With recursive
134
136
  fs.cp(s, t, recursive=True)
@@ -155,7 +157,9 @@ class AbstractCopyTests:
155
157
  assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
156
158
 
157
159
  fs.rm(fs_join(target, "subdir"), recursive=True)
158
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
160
+ assert fs.ls(target, detail=False) == (
161
+ [] if supports_empty_directories else [dummy]
162
+ )
159
163
 
160
164
  # Limit recursive by maxdepth
161
165
  fs.cp(s, t, recursive=True, maxdepth=1)
@@ -179,7 +183,9 @@ class AbstractCopyTests:
179
183
  assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
180
184
 
181
185
  fs.rm(fs_join(target, "subdir"), recursive=True)
182
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
186
+ assert fs.ls(target, detail=False) == (
187
+ [] if supports_empty_directories else [dummy]
188
+ )
183
189
 
184
190
  def test_copy_directory_to_new_directory(
185
191
  self,
@@ -271,7 +277,9 @@ class AbstractCopyTests:
271
277
  ],
272
278
  recursive=True,
273
279
  )
274
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
280
+ assert fs.ls(target, detail=False) == (
281
+ [] if supports_empty_directories else [dummy]
282
+ )
275
283
 
276
284
  # With recursive
277
285
  for glob, recursive in zip(["*", "**"], [True, False]):
@@ -290,7 +298,9 @@ class AbstractCopyTests:
290
298
  ],
291
299
  recursive=True,
292
300
  )
293
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
301
+ assert fs.ls(target, detail=False) == (
302
+ [] if supports_empty_directories else [dummy]
303
+ )
294
304
 
295
305
  # Limit recursive by maxdepth
296
306
  fs.cp(
@@ -308,7 +318,9 @@ class AbstractCopyTests:
308
318
  ],
309
319
  recursive=True,
310
320
  )
311
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
321
+ assert fs.ls(target, detail=False) == (
322
+ [] if supports_empty_directories else [dummy]
323
+ )
312
324
 
313
325
  def test_copy_glob_to_new_directory(
314
326
  self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
@@ -451,7 +463,9 @@ class AbstractCopyTests:
451
463
  ],
452
464
  recursive=True,
453
465
  )
454
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
466
+ assert fs.ls(target, detail=False) == (
467
+ [] if supports_empty_directories else [dummy]
468
+ )
455
469
 
456
470
  def test_copy_list_of_files_to_new_directory(
457
471
  self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target