fsspec 2024.3.0__py3-none-any.whl → 2024.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. fsspec/__init__.py +2 -3
  2. fsspec/_version.py +14 -19
  3. fsspec/caching.py +83 -14
  4. fsspec/compression.py +1 -0
  5. fsspec/core.py +31 -6
  6. fsspec/exceptions.py +1 -0
  7. fsspec/generic.py +1 -1
  8. fsspec/gui.py +1 -1
  9. fsspec/implementations/arrow.py +0 -2
  10. fsspec/implementations/cache_mapper.py +1 -2
  11. fsspec/implementations/cache_metadata.py +7 -7
  12. fsspec/implementations/dirfs.py +2 -2
  13. fsspec/implementations/http.py +9 -9
  14. fsspec/implementations/local.py +97 -48
  15. fsspec/implementations/memory.py +9 -0
  16. fsspec/implementations/smb.py +3 -1
  17. fsspec/implementations/tests/__init__.py +0 -0
  18. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
  22. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
  23. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
  24. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
  25. fsspec/implementations/tests/conftest.py +39 -0
  26. fsspec/implementations/tests/local/__init__.py +0 -0
  27. fsspec/implementations/tests/local/local_fixtures.py +18 -0
  28. fsspec/implementations/tests/local/local_test.py +14 -0
  29. fsspec/implementations/tests/memory/__init__.py +0 -0
  30. fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
  31. fsspec/implementations/tests/memory/memory_test.py +14 -0
  32. fsspec/implementations/tests/out.zip +0 -0
  33. fsspec/implementations/tests/test_archive.py +382 -0
  34. fsspec/implementations/tests/test_arrow.py +259 -0
  35. fsspec/implementations/tests/test_cached.py +1306 -0
  36. fsspec/implementations/tests/test_common.py +35 -0
  37. fsspec/implementations/tests/test_dask.py +29 -0
  38. fsspec/implementations/tests/test_data.py +20 -0
  39. fsspec/implementations/tests/test_dbfs.py +268 -0
  40. fsspec/implementations/tests/test_dirfs.py +588 -0
  41. fsspec/implementations/tests/test_ftp.py +178 -0
  42. fsspec/implementations/tests/test_git.py +76 -0
  43. fsspec/implementations/tests/test_http.py +577 -0
  44. fsspec/implementations/tests/test_jupyter.py +57 -0
  45. fsspec/implementations/tests/test_libarchive.py +33 -0
  46. fsspec/implementations/tests/test_local.py +1285 -0
  47. fsspec/implementations/tests/test_memory.py +382 -0
  48. fsspec/implementations/tests/test_reference.py +720 -0
  49. fsspec/implementations/tests/test_sftp.py +233 -0
  50. fsspec/implementations/tests/test_smb.py +139 -0
  51. fsspec/implementations/tests/test_tar.py +243 -0
  52. fsspec/implementations/tests/test_webhdfs.py +197 -0
  53. fsspec/implementations/tests/test_zip.py +134 -0
  54. fsspec/implementations/webhdfs.py +1 -3
  55. fsspec/mapping.py +2 -2
  56. fsspec/parquet.py +0 -8
  57. fsspec/registry.py +4 -0
  58. fsspec/spec.py +21 -4
  59. fsspec/tests/__init__.py +0 -0
  60. fsspec/tests/abstract/mv.py +57 -0
  61. fsspec/tests/conftest.py +188 -0
  62. fsspec/tests/data/listing.html +1 -0
  63. fsspec/tests/test_api.py +498 -0
  64. fsspec/tests/test_async.py +230 -0
  65. fsspec/tests/test_caches.py +255 -0
  66. fsspec/tests/test_callbacks.py +89 -0
  67. fsspec/tests/test_compression.py +164 -0
  68. fsspec/tests/test_config.py +129 -0
  69. fsspec/tests/test_core.py +466 -0
  70. fsspec/tests/test_downstream.py +40 -0
  71. fsspec/tests/test_file.py +200 -0
  72. fsspec/tests/test_fuse.py +147 -0
  73. fsspec/tests/test_generic.py +90 -0
  74. fsspec/tests/test_gui.py +23 -0
  75. fsspec/tests/test_mapping.py +228 -0
  76. fsspec/tests/test_parquet.py +140 -0
  77. fsspec/tests/test_registry.py +134 -0
  78. fsspec/tests/test_spec.py +1167 -0
  79. fsspec/tests/test_utils.py +478 -0
  80. fsspec/utils.py +0 -2
  81. fsspec-2024.5.0.dist-info/METADATA +273 -0
  82. fsspec-2024.5.0.dist-info/RECORD +111 -0
  83. {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
  84. fsspec-2024.3.0.dist-info/METADATA +0 -167
  85. fsspec-2024.3.0.dist-info/RECORD +0 -54
  86. fsspec-2024.3.0.dist-info/top_level.txt +0 -1
  87. {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
fsspec/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from importlib.metadata import entry_points
2
2
 
3
- from . import _version, caching
3
+ from . import caching
4
+ from ._version import __version__ # noqa: F401
4
5
  from .callbacks import Callback
5
6
  from .compression import available_compressions
6
7
  from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
@@ -15,8 +16,6 @@ from .registry import (
15
16
  )
16
17
  from .spec import AbstractFileSystem
17
18
 
18
- __version__ = _version.get_versions()["version"]
19
-
20
19
  __all__ = [
21
20
  "AbstractFileSystem",
22
21
  "FSTimeoutError",
fsspec/_version.py CHANGED
@@ -1,21 +1,16 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
1
9
 
2
- # This file was generated by 'versioneer.py' (0.29) from
3
- # revision-control system data, or from the parent directory name of an
4
- # unpacked source archive. Distribution tarballs contain a pre-generated copy
5
- # of this file.
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
6
14
 
7
- import json
8
-
9
- version_json = '''
10
- {
11
- "date": "2024-03-15T16:14:56-0400",
12
- "dirty": false,
13
- "error": null,
14
- "full-revisionid": "4bd16f64993af6092753d16df49298a19b58ce96",
15
- "version": "2024.3.0"
16
- }
17
- ''' # END VERSION_JSON
18
-
19
-
20
- def get_versions():
21
- return json.loads(version_json)
15
+ __version__ = version = '2024.5.0'
16
+ __version_tuple__ = version_tuple = (2024, 5, 0)
fsspec/caching.py CHANGED
@@ -56,8 +56,13 @@ class BaseCache:
56
56
 
57
57
  def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
58
58
  self.blocksize = blocksize
59
+ self.nblocks = 0
59
60
  self.fetcher = fetcher
60
61
  self.size = size
62
+ self.hit_count = 0
63
+ self.miss_count = 0
64
+ # the bytes that we actually requested
65
+ self.total_requested_bytes = 0
61
66
 
62
67
  def _fetch(self, start: int | None, stop: int | None) -> bytes:
63
68
  if start is None:
@@ -68,6 +73,36 @@ class BaseCache:
68
73
  return b""
69
74
  return self.fetcher(start, stop)
70
75
 
76
+ def _reset_stats(self) -> None:
77
+ """Reset hit and miss counts for a more ganular report e.g. by file."""
78
+ self.hit_count = 0
79
+ self.miss_count = 0
80
+ self.total_requested_bytes = 0
81
+
82
+ def _log_stats(self) -> str:
83
+ """Return a formatted string of the cache statistics."""
84
+ if self.hit_count == 0 and self.miss_count == 0:
85
+ # a cache that does nothing, this is for logs only
86
+ return ""
87
+ return " , %s: %d hits, %d misses, %d total requested bytes" % (
88
+ self.name,
89
+ self.hit_count,
90
+ self.miss_count,
91
+ self.total_requested_bytes,
92
+ )
93
+
94
+ def __repr__(self) -> str:
95
+ # TODO: use rich for better formatting
96
+ return f"""
97
+ <{self.__class__.__name__}:
98
+ block size : {self.blocksize}
99
+ block count : {self.nblocks}
100
+ file size : {self.size}
101
+ cache hits : {self.hit_count}
102
+ cache misses: {self.miss_count}
103
+ total requested bytes: {self.total_requested_bytes}>
104
+ """
105
+
71
106
 
72
107
  class MMapCache(BaseCache):
73
108
  """memory-mapped sparse file cache
@@ -126,13 +161,18 @@ class MMapCache(BaseCache):
126
161
  start_block = start // self.blocksize
127
162
  end_block = end // self.blocksize
128
163
  need = [i for i in range(start_block, end_block + 1) if i not in self.blocks]
164
+ hits = [i for i in range(start_block, end_block + 1) if i in self.blocks]
165
+ self.miss_count += len(need)
166
+ self.hit_count += len(hits)
129
167
  while need:
130
168
  # TODO: not a for loop so we can consolidate blocks later to
131
169
  # make fewer fetch calls; this could be parallel
132
170
  i = need.pop(0)
171
+
133
172
  sstart = i * self.blocksize
134
173
  send = min(sstart + self.blocksize, self.size)
135
- logger.debug(f"MMap get block #{i} ({sstart}-{send}")
174
+ self.total_requested_bytes += send - sstart
175
+ logger.debug(f"MMap get block #{i} ({sstart}-{send})")
136
176
  self.cache[sstart:send] = self.fetcher(sstart, send)
137
177
  self.blocks.add(i)
138
178
 
@@ -176,16 +216,20 @@ class ReadAheadCache(BaseCache):
176
216
  l = end - start
177
217
  if start >= self.start and end <= self.end:
178
218
  # cache hit
219
+ self.hit_count += 1
179
220
  return self.cache[start - self.start : end - self.start]
180
221
  elif self.start <= start < self.end:
181
222
  # partial hit
223
+ self.miss_count += 1
182
224
  part = self.cache[start - self.start :]
183
225
  l -= len(part)
184
226
  start = self.end
185
227
  else:
186
228
  # miss
229
+ self.miss_count += 1
187
230
  part = b""
188
231
  end = min(self.size, end + self.blocksize)
232
+ self.total_requested_bytes += end - start
189
233
  self.cache = self.fetcher(start, end) # new block replaces old
190
234
  self.start = start
191
235
  self.end = self.start + len(self.cache)
@@ -202,24 +246,39 @@ class FirstChunkCache(BaseCache):
202
246
  name = "first"
203
247
 
204
248
  def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
249
+ if blocksize > size:
250
+ # this will buffer the whole thing
251
+ blocksize = size
205
252
  super().__init__(blocksize, fetcher, size)
206
253
  self.cache: bytes | None = None
207
254
 
208
255
  def _fetch(self, start: int | None, end: int | None) -> bytes:
209
256
  start = start or 0
210
- end = end or self.size
257
+ if start > self.size:
258
+ logger.debug("FirstChunkCache: requested start > file size")
259
+ return b""
260
+
261
+ end = min(end, self.size)
262
+
211
263
  if start < self.blocksize:
212
264
  if self.cache is None:
265
+ self.miss_count += 1
213
266
  if end > self.blocksize:
267
+ self.total_requested_bytes += end
214
268
  data = self.fetcher(0, end)
215
269
  self.cache = data[: self.blocksize]
216
270
  return data[start:]
217
271
  self.cache = self.fetcher(0, self.blocksize)
272
+ self.total_requested_bytes += self.blocksize
218
273
  part = self.cache[start:end]
219
274
  if end > self.blocksize:
275
+ self.total_requested_bytes += end - self.blocksize
220
276
  part += self.fetcher(self.blocksize, end)
277
+ self.hit_count += 1
221
278
  return part
222
279
  else:
280
+ self.miss_count += 1
281
+ self.total_requested_bytes += end - start
223
282
  return self.fetcher(start, end)
224
283
 
225
284
 
@@ -256,12 +315,6 @@ class BlockCache(BaseCache):
256
315
  self.maxblocks = maxblocks
257
316
  self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
258
317
 
259
- def __repr__(self) -> str:
260
- return (
261
- f"<BlockCache blocksize={self.blocksize}, "
262
- f"size={self.size}, nblocks={self.nblocks}>"
263
- )
264
-
265
318
  def cache_info(self):
266
319
  """
267
320
  The statistics on the block cache.
@@ -319,6 +372,8 @@ class BlockCache(BaseCache):
319
372
 
320
373
  start = block_number * self.blocksize
321
374
  end = start + self.blocksize
375
+ self.total_requested_bytes += end - start
376
+ self.miss_count += 1
322
377
  logger.info("BlockCache fetching block %d", block_number)
323
378
  block_contents = super()._fetch(start, end)
324
379
  return block_contents
@@ -339,6 +394,7 @@ class BlockCache(BaseCache):
339
394
  start_pos = start % self.blocksize
340
395
  end_pos = end % self.blocksize
341
396
 
397
+ self.hit_count += 1
342
398
  if start_block_number == end_block_number:
343
399
  block: bytes = self._fetch_block_cached(start_block_number)
344
400
  return block[start_pos:end_pos]
@@ -404,6 +460,7 @@ class BytesCache(BaseCache):
404
460
  ):
405
461
  # cache hit: we have all the required data
406
462
  offset = start - self.start
463
+ self.hit_count += 1
407
464
  return self.cache[offset : offset + end - start]
408
465
 
409
466
  if self.blocksize:
@@ -418,17 +475,22 @@ class BytesCache(BaseCache):
418
475
  self.end is None or end > self.end
419
476
  ):
420
477
  # First read, or extending both before and after
478
+ self.total_requested_bytes += bend - start
479
+ self.miss_count += 1
421
480
  self.cache = self.fetcher(start, bend)
422
481
  self.start = start
423
482
  else:
424
483
  assert self.start is not None
425
484
  assert self.end is not None
485
+ self.miss_count += 1
426
486
 
427
487
  if start < self.start:
428
488
  if self.end is None or self.end - end > self.blocksize:
489
+ self.total_requested_bytes += bend - start
429
490
  self.cache = self.fetcher(start, bend)
430
491
  self.start = start
431
492
  else:
493
+ self.total_requested_bytes += self.start - start
432
494
  new = self.fetcher(start, self.start)
433
495
  self.start = start
434
496
  self.cache = new + self.cache
@@ -436,9 +498,11 @@ class BytesCache(BaseCache):
436
498
  if self.end > self.size:
437
499
  pass
438
500
  elif end - self.end > self.blocksize:
501
+ self.total_requested_bytes += bend - start
439
502
  self.cache = self.fetcher(start, bend)
440
503
  self.start = start
441
504
  else:
505
+ self.total_requested_bytes += bend - self.end
442
506
  new = self.fetcher(self.end, bend)
443
507
  self.cache = self.cache + new
444
508
 
@@ -470,10 +534,13 @@ class AllBytes(BaseCache):
470
534
  ) -> None:
471
535
  super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
472
536
  if data is None:
537
+ self.miss_count += 1
538
+ self.total_requested_bytes += self.size
473
539
  data = self.fetcher(0, self.size)
474
540
  self.data = data
475
541
 
476
542
  def _fetch(self, start: int | None, stop: int | None) -> bytes:
543
+ self.hit_count += 1
477
544
  return self.data[start:stop]
478
545
 
479
546
 
@@ -551,6 +618,7 @@ class KnownPartsOfAFile(BaseCache):
551
618
  # are allowed to pad reads beyond the
552
619
  # buffer with zero
553
620
  out += b"\x00" * (stop - start - len(out))
621
+ self.hit_count += 1
554
622
  return out
555
623
  else:
556
624
  # The request ends outside a known range,
@@ -572,6 +640,8 @@ class KnownPartsOfAFile(BaseCache):
572
640
  f"IO/caching performance may be poor!"
573
641
  )
574
642
  logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
643
+ self.total_requested_bytes += stop - start
644
+ self.miss_count += 1
575
645
  return out + super()._fetch(start, stop)
576
646
 
577
647
 
@@ -676,12 +746,6 @@ class BackgroundBlockCache(BaseCache):
676
746
  self._fetch_future: Future[bytes] | None = None
677
747
  self._fetch_future_lock = threading.Lock()
678
748
 
679
- def __repr__(self) -> str:
680
- return (
681
- f"<BackgroundBlockCache blocksize={self.blocksize}, "
682
- f"size={self.size}, nblocks={self.nblocks}>"
683
- )
684
-
685
749
  def cache_info(self) -> UpdatableLRU.CacheInfo:
686
750
  """
687
751
  The statistics on the block cache.
@@ -799,6 +863,8 @@ class BackgroundBlockCache(BaseCache):
799
863
  start = block_number * self.blocksize
800
864
  end = start + self.blocksize
801
865
  logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
866
+ self.total_requested_bytes += end - start
867
+ self.miss_count += 1
802
868
  block_contents = super()._fetch(start, end)
803
869
  return block_contents
804
870
 
@@ -818,6 +884,9 @@ class BackgroundBlockCache(BaseCache):
818
884
  start_pos = start % self.blocksize
819
885
  end_pos = end % self.blocksize
820
886
 
887
+ # kind of pointless to count this as a hit, but it is
888
+ self.hit_count += 1
889
+
821
890
  if start_block_number == end_block_number:
822
891
  block = self._fetch_block_cached(start_block_number)
823
892
  return block[start_pos:end_pos]
fsspec/compression.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Helper functions for a standard streaming compression API"""
2
+
2
3
  from zipfile import ZipFile
3
4
 
4
5
  import fsspec.utils
fsspec/core.py CHANGED
@@ -8,7 +8,7 @@ from glob import has_magic
8
8
  from pathlib import Path
9
9
 
10
10
  # for backwards compat, we export cache things from here too
11
- from .caching import ( # noqa: F401
11
+ from fsspec.caching import ( # noqa: F401
12
12
  BaseCache,
13
13
  BlockCache,
14
14
  BytesCache,
@@ -16,9 +16,10 @@ from .caching import ( # noqa: F401
16
16
  ReadAheadCache,
17
17
  caches,
18
18
  )
19
- from .compression import compr
20
- from .registry import filesystem, get_filesystem_class
21
- from .utils import (
19
+ from fsspec.compression import compr
20
+ from fsspec.config import conf
21
+ from fsspec.registry import filesystem, get_filesystem_class
22
+ from fsspec.utils import (
22
23
  _unstrip_protocol,
23
24
  build_name_function,
24
25
  infer_compression,
@@ -100,7 +101,18 @@ class OpenFile:
100
101
  def __enter__(self):
101
102
  mode = self.mode.replace("t", "").replace("b", "") + "b"
102
103
 
103
- f = self.fs.open(self.path, mode=mode)
104
+ try:
105
+ f = self.fs.open(self.path, mode=mode)
106
+ except FileNotFoundError as e:
107
+ if has_magic(self.path):
108
+ raise FileNotFoundError(
109
+ "%s not found. The URL contains glob characters: you maybe needed\n"
110
+ "to pass expand=True in fsspec.open() or the storage_options of \n"
111
+ "your library. You can also set the config value 'open_expand'\n"
112
+ "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
113
+ self.path,
114
+ ) from e
115
+ raise
104
116
 
105
117
  self.fobjects = [f]
106
118
 
@@ -367,6 +379,7 @@ def url_to_fs(url, **kwargs):
367
379
  urlpath : str
368
380
  The file-systems-specific URL for ``url``.
369
381
  """
382
+ url = stringify_path(url)
370
383
  # non-FS arguments that appear in fsspec.open()
371
384
  # inspect could keep this in sync with open()'s signature
372
385
  known_kwargs = {
@@ -396,6 +409,9 @@ def url_to_fs(url, **kwargs):
396
409
  return fs, urlpath
397
410
 
398
411
 
412
+ DEFAULT_EXPAND = conf.get("open_expand", False)
413
+
414
+
399
415
  def open(
400
416
  urlpath,
401
417
  mode="rb",
@@ -404,6 +420,7 @@ def open(
404
420
  errors=None,
405
421
  protocol=None,
406
422
  newline=None,
423
+ expand=None,
407
424
  **kwargs,
408
425
  ):
409
426
  """Given a path or paths, return one ``OpenFile`` object.
@@ -428,6 +445,13 @@ def open(
428
445
  newline: bytes or None
429
446
  Used for line terminator in text mode. If None, uses system default;
430
447
  if blank, uses no translation.
448
+ expand: bool or Nonw
449
+ Whether to regard file paths containing special glob characters as needing
450
+ expansion (finding the first match) or absolute. Setting False allows using
451
+ paths which do embed such characters. If None (default), this argument
452
+ takes its value from the DEFAULT_EXPAND module variable, which takes
453
+ its initial value from the "open_expand" config value at startup, which will
454
+ be False if not set.
431
455
  **kwargs: dict
432
456
  Extra options that make sense to a particular storage connection, e.g.
433
457
  host, port, username, password, etc.
@@ -456,6 +480,7 @@ def open(
456
480
  - For implementations in separate packages see
457
481
  https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
458
482
  """
483
+ expand = DEFAULT_EXPAND if expand is None else expand
459
484
  out = open_files(
460
485
  urlpath=[urlpath],
461
486
  mode=mode,
@@ -464,7 +489,7 @@ def open(
464
489
  errors=errors,
465
490
  protocol=protocol,
466
491
  newline=newline,
467
- expand=False,
492
+ expand=expand,
468
493
  **kwargs,
469
494
  )
470
495
  if not out:
fsspec/exceptions.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """
2
2
  fsspec user-defined exception classes
3
3
  """
4
+
4
5
  import asyncio
5
6
 
6
7
 
fsspec/generic.py CHANGED
@@ -139,7 +139,7 @@ def rsync(
139
139
  source_files, target_files = zip(*allfiles.items())
140
140
  fs.cp(source_files, target_files, **kwargs)
141
141
  logger.debug(f"{len(to_delete)} files to delete")
142
- if delete_missing:
142
+ if delete_missing and to_delete:
143
143
  fs.rm(to_delete)
144
144
  return allfiles
145
145
 
fsspec/gui.py CHANGED
@@ -94,7 +94,7 @@ class SigSlot:
94
94
  try:
95
95
  return self.panel._repr_mimebundle_(*args, **kwargs)
96
96
  except (ValueError, AttributeError):
97
- raise NotImplementedError("Panel does not seem to be set " "up properly")
97
+ raise NotImplementedError("Panel does not seem to be set up properly")
98
98
 
99
99
  def connect(self, signal, slot):
100
100
  """Associate call back with given event
@@ -139,8 +139,6 @@ class ArrowFSWrapper(AbstractFileSystem):
139
139
  path2 = self._strip_protocol(path2).rstrip("/")
140
140
  self.fs.move(path1, path2)
141
141
 
142
- mv_file = mv
143
-
144
142
  @wrap_exceptions
145
143
  def rm_file(self, path):
146
144
  path = self._strip_protocol(path)
@@ -12,8 +12,7 @@ class AbstractCacheMapper(abc.ABC):
12
12
  """
13
13
 
14
14
  @abc.abstractmethod
15
- def __call__(self, path: str) -> str:
16
- ...
15
+ def __call__(self, path: str) -> str: ...
17
16
 
18
17
  def __eq__(self, other: object) -> bool:
19
18
  # Identity only depends on class. When derived classes have attributes
@@ -57,10 +57,14 @@ class CacheMetadata:
57
57
  """Low-level function to load metadata from specific file"""
58
58
  try:
59
59
  with open(fn, "r") as f:
60
- return json.load(f)
60
+ loaded = json.load(f)
61
61
  except ValueError:
62
62
  with open(fn, "rb") as f:
63
- return pickle.load(f)
63
+ loaded = pickle.load(f)
64
+ for c in loaded.values():
65
+ if isinstance(c.get("blocks"), list):
66
+ c["blocks"] = set(c["blocks"])
67
+ return loaded
64
68
 
65
69
  def _save(self, metadata_to_save: Detail, fn: str) -> None:
66
70
  """Low-level function to save metadata to specific file"""
@@ -152,11 +156,7 @@ class CacheMetadata:
152
156
  for fn, _, _ in self._scan_locations():
153
157
  if os.path.exists(fn):
154
158
  # TODO: consolidate blocks here
155
- loaded_cached_files = self._load(fn)
156
- for c in loaded_cached_files.values():
157
- if isinstance(c["blocks"], list):
158
- c["blocks"] = set(c["blocks"])
159
- cached_files.append(loaded_cached_files)
159
+ cached_files.append(self._load(fn))
160
160
  else:
161
161
  cached_files.append({})
162
162
  self.cached_files = cached_files or [{}]
@@ -329,8 +329,8 @@ class DirFileSystem(AsyncFileSystem):
329
329
  def rmdir(self, path):
330
330
  return self.fs.rmdir(self._join(path))
331
331
 
332
- def mv_file(self, path1, path2, **kwargs):
333
- return self.fs.mv_file(
332
+ def mv(self, path1, path2, **kwargs):
333
+ return self.fs.mv(
334
334
  self._join(path1),
335
335
  self._join(path2),
336
336
  **kwargs,
@@ -451,7 +451,7 @@ class HTTPFileSystem(AsyncFileSystem):
451
451
 
452
452
  ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
453
453
  path = self._strip_protocol(path)
454
- append_slash_to_dirname = ends_with_slash or path.endswith("/**")
454
+ append_slash_to_dirname = ends_with_slash or path.endswith(("/**", "/*"))
455
455
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
456
456
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
457
457
 
@@ -494,15 +494,15 @@ class HTTPFileSystem(AsyncFileSystem):
494
494
  pattern = re.compile(pattern)
495
495
 
496
496
  out = {
497
- p: info
497
+ (
498
+ p.rstrip("/")
499
+ if not append_slash_to_dirname
500
+ and info["type"] == "directory"
501
+ and p.endswith("/")
502
+ else p
503
+ ): info
498
504
  for p, info in sorted(allpaths.items())
499
- if pattern.match(
500
- (
501
- p + "/"
502
- if append_slash_to_dirname and info["type"] == "directory"
503
- else p
504
- )
505
- )
505
+ if pattern.match(p.rstrip("/"))
506
506
  }
507
507
 
508
508
  if detail: