fsspec 2024.3.1__py3-none-any.whl → 2024.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from importlib.metadata import entry_points
2
2
 
3
- from . import _version, caching
3
+ from . import caching
4
+ from ._version import __version__ # noqa: F401
4
5
  from .callbacks import Callback
5
6
  from .compression import available_compressions
6
7
  from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
@@ -15,8 +16,6 @@ from .registry import (
15
16
  )
16
17
  from .spec import AbstractFileSystem
17
18
 
18
- __version__ = _version.get_versions()["version"]
19
-
20
19
  __all__ = [
21
20
  "AbstractFileSystem",
22
21
  "FSTimeoutError",
fsspec/_version.py CHANGED
@@ -1,21 +1,16 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
1
9
 
2
- # This file was generated by 'versioneer.py' (0.29) from
3
- # revision-control system data, or from the parent directory name of an
4
- # unpacked source archive. Distribution tarballs contain a pre-generated copy
5
- # of this file.
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
6
14
 
7
- import json
8
-
9
- version_json = '''
10
- {
11
- "date": "2024-03-18T15:33:58-0400",
12
- "dirty": false,
13
- "error": null,
14
- "full-revisionid": "47b445ae4c284a82dd15e0287b1ffc410e8fc470",
15
- "version": "2024.3.1"
16
- }
17
- ''' # END VERSION_JSON
18
-
19
-
20
- def get_versions():
21
- return json.loads(version_json)
15
+ __version__ = version = '2024.6.0'
16
+ __version_tuple__ = version_tuple = (2024, 6, 0)
fsspec/caching.py CHANGED
@@ -15,6 +15,7 @@ from typing import (
15
15
  ClassVar,
16
16
  Generic,
17
17
  NamedTuple,
18
+ Optional,
18
19
  OrderedDict,
19
20
  TypeVar,
20
21
  )
@@ -56,8 +57,13 @@ class BaseCache:
56
57
 
57
58
  def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
58
59
  self.blocksize = blocksize
60
+ self.nblocks = 0
59
61
  self.fetcher = fetcher
60
62
  self.size = size
63
+ self.hit_count = 0
64
+ self.miss_count = 0
65
+ # the bytes that we actually requested
66
+ self.total_requested_bytes = 0
61
67
 
62
68
  def _fetch(self, start: int | None, stop: int | None) -> bytes:
63
69
  if start is None:
@@ -68,6 +74,36 @@ class BaseCache:
68
74
  return b""
69
75
  return self.fetcher(start, stop)
70
76
 
77
+ def _reset_stats(self) -> None:
78
+ """Reset hit and miss counts for a more ganular report e.g. by file."""
79
+ self.hit_count = 0
80
+ self.miss_count = 0
81
+ self.total_requested_bytes = 0
82
+
83
+ def _log_stats(self) -> str:
84
+ """Return a formatted string of the cache statistics."""
85
+ if self.hit_count == 0 and self.miss_count == 0:
86
+ # a cache that does nothing, this is for logs only
87
+ return ""
88
+ return " , %s: %d hits, %d misses, %d total requested bytes" % (
89
+ self.name,
90
+ self.hit_count,
91
+ self.miss_count,
92
+ self.total_requested_bytes,
93
+ )
94
+
95
+ def __repr__(self) -> str:
96
+ # TODO: use rich for better formatting
97
+ return f"""
98
+ <{self.__class__.__name__}:
99
+ block size : {self.blocksize}
100
+ block count : {self.nblocks}
101
+ file size : {self.size}
102
+ cache hits : {self.hit_count}
103
+ cache misses: {self.miss_count}
104
+ total requested bytes: {self.total_requested_bytes}>
105
+ """
106
+
71
107
 
72
108
  class MMapCache(BaseCache):
73
109
  """memory-mapped sparse file cache
@@ -126,13 +162,18 @@ class MMapCache(BaseCache):
126
162
  start_block = start // self.blocksize
127
163
  end_block = end // self.blocksize
128
164
  need = [i for i in range(start_block, end_block + 1) if i not in self.blocks]
165
+ hits = [i for i in range(start_block, end_block + 1) if i in self.blocks]
166
+ self.miss_count += len(need)
167
+ self.hit_count += len(hits)
129
168
  while need:
130
169
  # TODO: not a for loop so we can consolidate blocks later to
131
170
  # make fewer fetch calls; this could be parallel
132
171
  i = need.pop(0)
172
+
133
173
  sstart = i * self.blocksize
134
174
  send = min(sstart + self.blocksize, self.size)
135
- logger.debug(f"MMap get block #{i} ({sstart}-{send}")
175
+ self.total_requested_bytes += send - sstart
176
+ logger.debug(f"MMap get block #{i} ({sstart}-{send})")
136
177
  self.cache[sstart:send] = self.fetcher(sstart, send)
137
178
  self.blocks.add(i)
138
179
 
@@ -176,16 +217,20 @@ class ReadAheadCache(BaseCache):
176
217
  l = end - start
177
218
  if start >= self.start and end <= self.end:
178
219
  # cache hit
220
+ self.hit_count += 1
179
221
  return self.cache[start - self.start : end - self.start]
180
222
  elif self.start <= start < self.end:
181
223
  # partial hit
224
+ self.miss_count += 1
182
225
  part = self.cache[start - self.start :]
183
226
  l -= len(part)
184
227
  start = self.end
185
228
  else:
186
229
  # miss
230
+ self.miss_count += 1
187
231
  part = b""
188
232
  end = min(self.size, end + self.blocksize)
233
+ self.total_requested_bytes += end - start
189
234
  self.cache = self.fetcher(start, end) # new block replaces old
190
235
  self.start = start
191
236
  self.end = self.start + len(self.cache)
@@ -202,24 +247,39 @@ class FirstChunkCache(BaseCache):
202
247
  name = "first"
203
248
 
204
249
  def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
250
+ if blocksize > size:
251
+ # this will buffer the whole thing
252
+ blocksize = size
205
253
  super().__init__(blocksize, fetcher, size)
206
254
  self.cache: bytes | None = None
207
255
 
208
256
  def _fetch(self, start: int | None, end: int | None) -> bytes:
209
257
  start = start or 0
210
- end = end or self.size
258
+ if start > self.size:
259
+ logger.debug("FirstChunkCache: requested start > file size")
260
+ return b""
261
+
262
+ end = min(end, self.size)
263
+
211
264
  if start < self.blocksize:
212
265
  if self.cache is None:
266
+ self.miss_count += 1
213
267
  if end > self.blocksize:
268
+ self.total_requested_bytes += end
214
269
  data = self.fetcher(0, end)
215
270
  self.cache = data[: self.blocksize]
216
271
  return data[start:]
217
272
  self.cache = self.fetcher(0, self.blocksize)
273
+ self.total_requested_bytes += self.blocksize
218
274
  part = self.cache[start:end]
219
275
  if end > self.blocksize:
276
+ self.total_requested_bytes += end - self.blocksize
220
277
  part += self.fetcher(self.blocksize, end)
278
+ self.hit_count += 1
221
279
  return part
222
280
  else:
281
+ self.miss_count += 1
282
+ self.total_requested_bytes += end - start
223
283
  return self.fetcher(start, end)
224
284
 
225
285
 
@@ -256,12 +316,6 @@ class BlockCache(BaseCache):
256
316
  self.maxblocks = maxblocks
257
317
  self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
258
318
 
259
- def __repr__(self) -> str:
260
- return (
261
- f"<BlockCache blocksize={self.blocksize}, "
262
- f"size={self.size}, nblocks={self.nblocks}>"
263
- )
264
-
265
319
  def cache_info(self):
266
320
  """
267
321
  The statistics on the block cache.
@@ -319,6 +373,8 @@ class BlockCache(BaseCache):
319
373
 
320
374
  start = block_number * self.blocksize
321
375
  end = start + self.blocksize
376
+ self.total_requested_bytes += end - start
377
+ self.miss_count += 1
322
378
  logger.info("BlockCache fetching block %d", block_number)
323
379
  block_contents = super()._fetch(start, end)
324
380
  return block_contents
@@ -339,6 +395,7 @@ class BlockCache(BaseCache):
339
395
  start_pos = start % self.blocksize
340
396
  end_pos = end % self.blocksize
341
397
 
398
+ self.hit_count += 1
342
399
  if start_block_number == end_block_number:
343
400
  block: bytes = self._fetch_block_cached(start_block_number)
344
401
  return block[start_pos:end_pos]
@@ -404,6 +461,7 @@ class BytesCache(BaseCache):
404
461
  ):
405
462
  # cache hit: we have all the required data
406
463
  offset = start - self.start
464
+ self.hit_count += 1
407
465
  return self.cache[offset : offset + end - start]
408
466
 
409
467
  if self.blocksize:
@@ -418,17 +476,22 @@ class BytesCache(BaseCache):
418
476
  self.end is None or end > self.end
419
477
  ):
420
478
  # First read, or extending both before and after
479
+ self.total_requested_bytes += bend - start
480
+ self.miss_count += 1
421
481
  self.cache = self.fetcher(start, bend)
422
482
  self.start = start
423
483
  else:
424
484
  assert self.start is not None
425
485
  assert self.end is not None
486
+ self.miss_count += 1
426
487
 
427
488
  if start < self.start:
428
489
  if self.end is None or self.end - end > self.blocksize:
490
+ self.total_requested_bytes += bend - start
429
491
  self.cache = self.fetcher(start, bend)
430
492
  self.start = start
431
493
  else:
494
+ self.total_requested_bytes += self.start - start
432
495
  new = self.fetcher(start, self.start)
433
496
  self.start = start
434
497
  self.cache = new + self.cache
@@ -436,9 +499,11 @@ class BytesCache(BaseCache):
436
499
  if self.end > self.size:
437
500
  pass
438
501
  elif end - self.end > self.blocksize:
502
+ self.total_requested_bytes += bend - start
439
503
  self.cache = self.fetcher(start, bend)
440
504
  self.start = start
441
505
  else:
506
+ self.total_requested_bytes += bend - self.end
442
507
  new = self.fetcher(self.end, bend)
443
508
  self.cache = self.cache + new
444
509
 
@@ -470,10 +535,13 @@ class AllBytes(BaseCache):
470
535
  ) -> None:
471
536
  super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
472
537
  if data is None:
538
+ self.miss_count += 1
539
+ self.total_requested_bytes += self.size
473
540
  data = self.fetcher(0, self.size)
474
541
  self.data = data
475
542
 
476
543
  def _fetch(self, start: int | None, stop: int | None) -> bytes:
544
+ self.hit_count += 1
477
545
  return self.data[start:stop]
478
546
 
479
547
 
@@ -507,7 +575,7 @@ class KnownPartsOfAFile(BaseCache):
507
575
  blocksize: int,
508
576
  fetcher: Fetcher,
509
577
  size: int,
510
- data: dict[tuple[int, int], bytes] = {},
578
+ data: Optional[dict[tuple[int, int], bytes]] = None,
511
579
  strict: bool = True,
512
580
  **_: Any,
513
581
  ):
@@ -530,7 +598,7 @@ class KnownPartsOfAFile(BaseCache):
530
598
 
531
599
  self.data = dict(zip(offsets, blocks))
532
600
  else:
533
- self.data = data
601
+ self.data = {}
534
602
 
535
603
  def _fetch(self, start: int | None, stop: int | None) -> bytes:
536
604
  if start is None:
@@ -551,6 +619,7 @@ class KnownPartsOfAFile(BaseCache):
551
619
  # are allowed to pad reads beyond the
552
620
  # buffer with zero
553
621
  out += b"\x00" * (stop - start - len(out))
622
+ self.hit_count += 1
554
623
  return out
555
624
  else:
556
625
  # The request ends outside a known range,
@@ -572,6 +641,8 @@ class KnownPartsOfAFile(BaseCache):
572
641
  f"IO/caching performance may be poor!"
573
642
  )
574
643
  logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
644
+ self.total_requested_bytes += stop - start
645
+ self.miss_count += 1
575
646
  return out + super()._fetch(start, stop)
576
647
 
577
648
 
@@ -676,12 +747,6 @@ class BackgroundBlockCache(BaseCache):
676
747
  self._fetch_future: Future[bytes] | None = None
677
748
  self._fetch_future_lock = threading.Lock()
678
749
 
679
- def __repr__(self) -> str:
680
- return (
681
- f"<BackgroundBlockCache blocksize={self.blocksize}, "
682
- f"size={self.size}, nblocks={self.nblocks}>"
683
- )
684
-
685
750
  def cache_info(self) -> UpdatableLRU.CacheInfo:
686
751
  """
687
752
  The statistics on the block cache.
@@ -799,6 +864,8 @@ class BackgroundBlockCache(BaseCache):
799
864
  start = block_number * self.blocksize
800
865
  end = start + self.blocksize
801
866
  logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
867
+ self.total_requested_bytes += end - start
868
+ self.miss_count += 1
802
869
  block_contents = super()._fetch(start, end)
803
870
  return block_contents
804
871
 
@@ -818,6 +885,9 @@ class BackgroundBlockCache(BaseCache):
818
885
  start_pos = start % self.blocksize
819
886
  end_pos = end % self.blocksize
820
887
 
888
+ # kind of pointless to count this as a hit, but it is
889
+ self.hit_count += 1
890
+
821
891
  if start_block_number == end_block_number:
822
892
  block = self._fetch_block_cached(start_block_number)
823
893
  return block[start_pos:end_pos]
fsspec/compression.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Helper functions for a standard streaming compression API"""
2
+
2
3
  from zipfile import ZipFile
3
4
 
4
5
  import fsspec.utils
@@ -138,7 +139,7 @@ class SnappyFile(AbstractBufferedFile):
138
139
  try:
139
140
  import snappy
140
141
 
141
- snappy.compress
142
+ snappy.compress(b"")
142
143
  # Snappy may use the .sz file extension, but this is not part of the
143
144
  # standard implementation.
144
145
  register_compression("snappy", SnappyFile, [])
fsspec/core.py CHANGED
@@ -8,7 +8,7 @@ from glob import has_magic
8
8
  from pathlib import Path
9
9
 
10
10
  # for backwards compat, we export cache things from here too
11
- from .caching import ( # noqa: F401
11
+ from fsspec.caching import ( # noqa: F401
12
12
  BaseCache,
13
13
  BlockCache,
14
14
  BytesCache,
@@ -16,9 +16,10 @@ from .caching import ( # noqa: F401
16
16
  ReadAheadCache,
17
17
  caches,
18
18
  )
19
- from .compression import compr
20
- from .registry import filesystem, get_filesystem_class
21
- from .utils import (
19
+ from fsspec.compression import compr
20
+ from fsspec.config import conf
21
+ from fsspec.registry import filesystem, get_filesystem_class
22
+ from fsspec.utils import (
22
23
  _unstrip_protocol,
23
24
  build_name_function,
24
25
  infer_compression,
@@ -100,7 +101,18 @@ class OpenFile:
100
101
  def __enter__(self):
101
102
  mode = self.mode.replace("t", "").replace("b", "") + "b"
102
103
 
103
- f = self.fs.open(self.path, mode=mode)
104
+ try:
105
+ f = self.fs.open(self.path, mode=mode)
106
+ except FileNotFoundError as e:
107
+ if has_magic(self.path):
108
+ raise FileNotFoundError(
109
+ "%s not found. The URL contains glob characters: you maybe needed\n"
110
+ "to pass expand=True in fsspec.open() or the storage_options of \n"
111
+ "your library. You can also set the config value 'open_expand'\n"
112
+ "before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
113
+ self.path,
114
+ ) from e
115
+ raise
104
116
 
105
117
  self.fobjects = [f]
106
118
 
@@ -367,6 +379,7 @@ def url_to_fs(url, **kwargs):
367
379
  urlpath : str
368
380
  The file-systems-specific URL for ``url``.
369
381
  """
382
+ url = stringify_path(url)
370
383
  # non-FS arguments that appear in fsspec.open()
371
384
  # inspect could keep this in sync with open()'s signature
372
385
  known_kwargs = {
@@ -396,6 +409,9 @@ def url_to_fs(url, **kwargs):
396
409
  return fs, urlpath
397
410
 
398
411
 
412
+ DEFAULT_EXPAND = conf.get("open_expand", False)
413
+
414
+
399
415
  def open(
400
416
  urlpath,
401
417
  mode="rb",
@@ -404,6 +420,7 @@ def open(
404
420
  errors=None,
405
421
  protocol=None,
406
422
  newline=None,
423
+ expand=None,
407
424
  **kwargs,
408
425
  ):
409
426
  """Given a path or paths, return one ``OpenFile`` object.
@@ -428,6 +445,13 @@ def open(
428
445
  newline: bytes or None
429
446
  Used for line terminator in text mode. If None, uses system default;
430
447
  if blank, uses no translation.
448
+ expand: bool or Nonw
449
+ Whether to regard file paths containing special glob characters as needing
450
+ expansion (finding the first match) or absolute. Setting False allows using
451
+ paths which do embed such characters. If None (default), this argument
452
+ takes its value from the DEFAULT_EXPAND module variable, which takes
453
+ its initial value from the "open_expand" config value at startup, which will
454
+ be False if not set.
431
455
  **kwargs: dict
432
456
  Extra options that make sense to a particular storage connection, e.g.
433
457
  host, port, username, password, etc.
@@ -456,8 +480,7 @@ def open(
456
480
  - For implementations in separate packages see
457
481
  https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
458
482
  """
459
- kw = {"expand": False}
460
- kw.update(kwargs)
483
+ expand = DEFAULT_EXPAND if expand is None else expand
461
484
  out = open_files(
462
485
  urlpath=[urlpath],
463
486
  mode=mode,
@@ -466,7 +489,8 @@ def open(
466
489
  errors=errors,
467
490
  protocol=protocol,
468
491
  newline=newline,
469
- **kw,
492
+ expand=expand,
493
+ **kwargs,
470
494
  )
471
495
  if not out:
472
496
  raise FileNotFoundError(urlpath)
fsspec/exceptions.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """
2
2
  fsspec user-defined exception classes
3
3
  """
4
+
4
5
  import asyncio
5
6
 
6
7
 
fsspec/generic.py CHANGED
@@ -139,7 +139,7 @@ def rsync(
139
139
  source_files, target_files = zip(*allfiles.items())
140
140
  fs.cp(source_files, target_files, **kwargs)
141
141
  logger.debug(f"{len(to_delete)} files to delete")
142
- if delete_missing:
142
+ if delete_missing and to_delete:
143
143
  fs.rm(to_delete)
144
144
  return allfiles
145
145
 
fsspec/gui.py CHANGED
@@ -94,7 +94,7 @@ class SigSlot:
94
94
  try:
95
95
  return self.panel._repr_mimebundle_(*args, **kwargs)
96
96
  except (ValueError, AttributeError):
97
- raise NotImplementedError("Panel does not seem to be set " "up properly")
97
+ raise NotImplementedError("Panel does not seem to be set up properly")
98
98
 
99
99
  def connect(self, signal, slot):
100
100
  """Associate call back with given event
@@ -139,8 +139,6 @@ class ArrowFSWrapper(AbstractFileSystem):
139
139
  path2 = self._strip_protocol(path2).rstrip("/")
140
140
  self.fs.move(path1, path2)
141
141
 
142
- mv_file = mv
143
-
144
142
  @wrap_exceptions
145
143
  def rm_file(self, path):
146
144
  path = self._strip_protocol(path)
@@ -12,8 +12,7 @@ class AbstractCacheMapper(abc.ABC):
12
12
  """
13
13
 
14
14
  @abc.abstractmethod
15
- def __call__(self, path: str) -> str:
16
- ...
15
+ def __call__(self, path: str) -> str: ...
17
16
 
18
17
  def __eq__(self, other: object) -> bool:
19
18
  # Identity only depends on class. When derived classes have attributes
@@ -57,10 +57,14 @@ class CacheMetadata:
57
57
  """Low-level function to load metadata from specific file"""
58
58
  try:
59
59
  with open(fn, "r") as f:
60
- return json.load(f)
60
+ loaded = json.load(f)
61
61
  except ValueError:
62
62
  with open(fn, "rb") as f:
63
- return pickle.load(f)
63
+ loaded = pickle.load(f)
64
+ for c in loaded.values():
65
+ if isinstance(c.get("blocks"), list):
66
+ c["blocks"] = set(c["blocks"])
67
+ return loaded
64
68
 
65
69
  def _save(self, metadata_to_save: Detail, fn: str) -> None:
66
70
  """Low-level function to save metadata to specific file"""
@@ -152,11 +156,7 @@ class CacheMetadata:
152
156
  for fn, _, _ in self._scan_locations():
153
157
  if os.path.exists(fn):
154
158
  # TODO: consolidate blocks here
155
- loaded_cached_files = self._load(fn)
156
- for c in loaded_cached_files.values():
157
- if isinstance(c["blocks"], list):
158
- c["blocks"] = set(c["blocks"])
159
- cached_files.append(loaded_cached_files)
159
+ cached_files.append(self._load(fn))
160
160
  else:
161
161
  cached_files.append({})
162
162
  self.cached_files = cached_files or [{}]
@@ -425,7 +425,6 @@ class CachingFileSystem(AbstractFileSystem):
425
425
  "clear_cache",
426
426
  "clear_expired_cache",
427
427
  "pop_from_cache",
428
- "_mkcache",
429
428
  "local_file",
430
429
  "_paths_from_path",
431
430
  "get_mapper",
@@ -435,12 +434,10 @@ class CachingFileSystem(AbstractFileSystem):
435
434
  "__hash__",
436
435
  "__eq__",
437
436
  "to_json",
437
+ "to_dict",
438
438
  "cache_size",
439
439
  "pipe_file",
440
440
  "pipe",
441
- "isdir",
442
- "isfile",
443
- "exists",
444
441
  "start_transaction",
445
442
  "end_transaction",
446
443
  }:
@@ -510,15 +507,6 @@ class CachingFileSystem(AbstractFileSystem):
510
507
  ^ hash(self.target_protocol)
511
508
  )
512
509
 
513
- def to_json(self):
514
- """Calculate JSON representation.
515
-
516
- Not implemented yet for CachingFileSystem.
517
- """
518
- raise NotImplementedError(
519
- "CachingFileSystem JSON representation not implemented"
520
- )
521
-
522
510
 
523
511
  class WholeFileCacheFileSystem(CachingFileSystem):
524
512
  """Caches whole remote files on first access
@@ -329,8 +329,8 @@ class DirFileSystem(AsyncFileSystem):
329
329
  def rmdir(self, path):
330
330
  return self.fs.rmdir(self._join(path))
331
331
 
332
- def mv_file(self, path1, path2, **kwargs):
333
- return self.fs.mv_file(
332
+ def mv(self, path1, path2, **kwargs):
333
+ return self.fs.mv(
334
334
  self._join(path1),
335
335
  self._join(path2),
336
336
  **kwargs,
@@ -1,5 +1,7 @@
1
1
  import requests
2
2
 
3
+ import fsspec
4
+
3
5
  from ..spec import AbstractFileSystem
4
6
  from ..utils import infer_storage_options
5
7
  from .memory import MemoryFile
@@ -225,3 +227,13 @@ class GithubFileSystem(AbstractFileSystem):
225
227
  raise FileNotFoundError(path)
226
228
  r.raise_for_status()
227
229
  return MemoryFile(None, None, r.content)
230
+
231
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
232
+ paths = self.expand_path(path, recursive=recursive)
233
+ urls = [
234
+ self.rurl.format(org=self.org, repo=self.repo, path=u, sha=self.root)
235
+ for u, sh in paths
236
+ ]
237
+ fs = fsspec.filesystem("http")
238
+ data = fs.cat(urls, on_error="return")
239
+ return {u: v for ((k, v), u) in zip(data.items(), urls)}
@@ -451,7 +451,7 @@ class HTTPFileSystem(AsyncFileSystem):
451
451
 
452
452
  ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
453
453
  path = self._strip_protocol(path)
454
- append_slash_to_dirname = ends_with_slash or path.endswith("/**")
454
+ append_slash_to_dirname = ends_with_slash or path.endswith(("/**", "/*"))
455
455
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
456
456
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
457
457
 
@@ -494,15 +494,15 @@ class HTTPFileSystem(AsyncFileSystem):
494
494
  pattern = re.compile(pattern)
495
495
 
496
496
  out = {
497
- p: info
497
+ (
498
+ p.rstrip("/")
499
+ if not append_slash_to_dirname
500
+ and info["type"] == "directory"
501
+ and p.endswith("/")
502
+ else p
503
+ ): info
498
504
  for p, info in sorted(allpaths.items())
499
- if pattern.match(
500
- (
501
- p + "/"
502
- if append_slash_to_dirname and info["type"] == "directory"
503
- else p
504
- )
505
- )
505
+ if pattern.match(p.rstrip("/"))
506
506
  }
507
507
 
508
508
  if detail: