fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/gui.py CHANGED
@@ -70,7 +70,7 @@ class SigSlot:
70
70
  same name.
71
71
  """
72
72
  if name not in self.signals:
73
- raise ValueError("Attempt to assign an undeclared signal: %s" % name)
73
+ raise ValueError(f"Attempt to assign an undeclared signal: {name}")
74
74
  self._sigs[name] = {
75
75
  "widget": widget,
76
76
  "callbacks": [],
@@ -141,7 +141,7 @@ class SigSlot:
141
141
 
142
142
  Calling of callbacks will halt whenever one returns False.
143
143
  """
144
- logger.log(self._sigs[sig]["log"], "{}: {}".format(sig, value))
144
+ logger.log(self._sigs[sig]["log"], f"{sig}: {value}")
145
145
  for callback in self._sigs[sig]["callbacks"]:
146
146
  if isinstance(callback, str):
147
147
  self._emit(callback)
@@ -242,7 +242,7 @@ class FileSelector(SigSlot):
242
242
  else:
243
243
  self.init_protocol, url = "file", os.getcwd()
244
244
  self.init_url = url
245
- self.init_kwargs = kwargs or "{}"
245
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
246
246
  self.filters = filters
247
247
  self.ignore = [re.compile(i) for i in ignore or []]
248
248
  self._fs = None
@@ -319,7 +319,7 @@ class FileSelector(SigSlot):
319
319
  def urlpath(self):
320
320
  """URL of currently selected item"""
321
321
  return (
322
- (self.protocol.value + "://" + self.main.value[0])
322
+ (f"{self.protocol.value}://{self.main.value[0]}")
323
323
  if self.main.value
324
324
  else None
325
325
  )
@@ -17,6 +17,7 @@ from fsspec.exceptions import BlocksizeMismatchError
17
17
  from fsspec.implementations.cache_mapper import create_cache_mapper
18
18
  from fsspec.implementations.cache_metadata import CacheMetadata
19
19
  from fsspec.spec import AbstractBufferedFile
20
+ from fsspec.transaction import Transaction
20
21
  from fsspec.utils import infer_compression
21
22
 
22
23
  if TYPE_CHECKING:
@@ -25,6 +26,16 @@ if TYPE_CHECKING:
25
26
  logger = logging.getLogger("fsspec.cached")
26
27
 
27
28
 
29
+ class WriteCachedTransaction(Transaction):
30
+ def complete(self, commit=True):
31
+ rpaths = [f.path for f in self.files]
32
+ lpaths = [f.fn for f in self.files]
33
+ if commit:
34
+ self.fs.put(lpaths, rpaths)
35
+ # else remove?
36
+ self.fs._intrans = False
37
+
38
+
28
39
  class CachingFileSystem(AbstractFileSystem):
29
40
  """Locally caching filesystem, layer over any other FS
30
41
 
@@ -128,6 +139,11 @@ class CachingFileSystem(AbstractFileSystem):
128
139
  self.expiry = expiry_time
129
140
  self.compression = compression
130
141
 
142
+ # Size of cache in bytes. If None then the size is unknown and will be
143
+ # recalculated the next time cache_size() is called. On writes to the
144
+ # cache this is reset to None.
145
+ self._cache_size = None
146
+
131
147
  if same_names is not None and cache_mapper is not None:
132
148
  raise ValueError(
133
149
  "Cannot specify both same_names and cache_mapper in "
@@ -165,6 +181,17 @@ class CachingFileSystem(AbstractFileSystem):
165
181
  def _mkcache(self):
166
182
  os.makedirs(self.storage[-1], exist_ok=True)
167
183
 
184
+ def cache_size(self):
185
+ """Return size of cache in bytes.
186
+
187
+ If more than one cache directory is in use, only the size of the last
188
+ one (the writable cache directory) is returned.
189
+ """
190
+ if self._cache_size is None:
191
+ cache_dir = self.storage[-1]
192
+ self._cache_size = filesystem("file").du(cache_dir, withdirs=True)
193
+ return self._cache_size
194
+
168
195
  def load_cache(self):
169
196
  """Read set of stored blocks from file"""
170
197
  self._metadata.load()
@@ -176,6 +203,7 @@ class CachingFileSystem(AbstractFileSystem):
176
203
  self._mkcache()
177
204
  self._metadata.save()
178
205
  self.last_cache = time.time()
206
+ self._cache_size = None
179
207
 
180
208
  def _check_cache(self):
181
209
  """Reload caches if time elapsed or any disappeared"""
@@ -202,6 +230,7 @@ class CachingFileSystem(AbstractFileSystem):
202
230
  """
203
231
  rmtree(self.storage[-1])
204
232
  self.load_cache()
233
+ self._cache_size = None
205
234
 
206
235
  def clear_expired_cache(self, expiry_time=None):
207
236
  """Remove all expired files and metadata from the cache
@@ -231,6 +260,8 @@ class CachingFileSystem(AbstractFileSystem):
231
260
  rmtree(self.storage[-1])
232
261
  self.load_cache()
233
262
 
263
+ self._cache_size = None
264
+
234
265
  def pop_from_cache(self, path):
235
266
  """Remove cached version of given file
236
267
 
@@ -242,6 +273,7 @@ class CachingFileSystem(AbstractFileSystem):
242
273
  fn = self._metadata.pop_file(path)
243
274
  if fn is not None:
244
275
  os.remove(fn)
276
+ self._cache_size = None
245
277
 
246
278
  def _open(
247
279
  self,
@@ -283,10 +315,10 @@ class CachingFileSystem(AbstractFileSystem):
283
315
  hash, blocks = detail["fn"], detail["blocks"]
284
316
  if blocks is True:
285
317
  # stored file is complete
286
- logger.debug("Opening local copy of %s" % path)
318
+ logger.debug("Opening local copy of %s", path)
287
319
  return open(fn, mode)
288
320
  # TODO: action where partial file exists in read-only cache
289
- logger.debug("Opening partially cached copy of %s" % path)
321
+ logger.debug("Opening partially cached copy of %s", path)
290
322
  else:
291
323
  hash = self._mapper(path)
292
324
  fn = os.path.join(self.storage[-1], hash)
@@ -299,7 +331,7 @@ class CachingFileSystem(AbstractFileSystem):
299
331
  "uid": self.fs.ukey(path),
300
332
  }
301
333
  self._metadata.update_file(path, detail)
302
- logger.debug("Creating local sparse file for %s" % path)
334
+ logger.debug("Creating local sparse file for %s", path)
303
335
 
304
336
  # call target filesystems open
305
337
  self._mkcache()
@@ -322,9 +354,9 @@ class CachingFileSystem(AbstractFileSystem):
322
354
  if "blocksize" in detail:
323
355
  if detail["blocksize"] != f.blocksize:
324
356
  raise BlocksizeMismatchError(
325
- "Cached file must be reopened with same block"
326
- "size as original (old: %i, new %i)"
327
- "" % (detail["blocksize"], f.blocksize)
357
+ f"Cached file must be reopened with same block"
358
+ f" size as original (old: {detail['blocksize']},"
359
+ f" new {f.blocksize})"
328
360
  )
329
361
  else:
330
362
  detail["blocksize"] = f.blocksize
@@ -334,6 +366,9 @@ class CachingFileSystem(AbstractFileSystem):
334
366
  self.save_cache()
335
367
  return f
336
368
 
369
+ def _parent(self, path):
370
+ return self.fs._parent(path)
371
+
337
372
  def hash_name(self, path: str, *args: Any) -> str:
338
373
  # Kept for backward compatibility with downstream libraries.
339
374
  # Ignores extra arguments, previously same_name boolean.
@@ -369,6 +404,7 @@ class CachingFileSystem(AbstractFileSystem):
369
404
  "open",
370
405
  "cat",
371
406
  "cat_file",
407
+ "cat_ranges",
372
408
  "get",
373
409
  "read_block",
374
410
  "tail",
@@ -389,6 +425,11 @@ class CachingFileSystem(AbstractFileSystem):
389
425
  "__hash__",
390
426
  "__eq__",
391
427
  "to_json",
428
+ "cache_size",
429
+ "pipe_file",
430
+ "pipe",
431
+ "start_transaction",
432
+ "end_transaction",
392
433
  ]:
393
434
  # all the methods defined in this class. Note `open` here, since
394
435
  # it calls `_open`, but is actually in superclass
@@ -397,7 +438,10 @@ class CachingFileSystem(AbstractFileSystem):
397
438
  )
398
439
  if item in ["__reduce_ex__"]:
399
440
  raise AttributeError
400
- if item in ["_cache"]:
441
+ if item in ["transaction"]:
442
+ # property
443
+ return type(self).transaction.__get__(self)
444
+ if item in ["_cache", "transaction_type"]:
401
445
  # class attributes
402
446
  return getattr(type(self), item)
403
447
  if item == "__class__":
@@ -486,7 +530,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
486
530
  self._mkcache()
487
531
  else:
488
532
  return [
489
- LocalTempFile(self.fs, path, mode=open_files.mode) for path in paths
533
+ LocalTempFile(
534
+ self.fs,
535
+ path,
536
+ mode=open_files.mode,
537
+ fn=os.path.join(self.storage[-1], self._mapper(path)),
538
+ )
539
+ for path in paths
490
540
  ]
491
541
 
492
542
  if self.compression:
@@ -535,6 +585,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
535
585
  os.remove(f.name)
536
586
  except FileNotFoundError:
537
587
  pass
588
+ self._cache_size = None
538
589
 
539
590
  def _make_local_details(self, path):
540
591
  hash = self._mapper(path)
@@ -547,7 +598,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
547
598
  "uid": self.fs.ukey(path),
548
599
  }
549
600
  self._metadata.update_file(path, detail)
550
- logger.debug("Copying %s to local cache" % path)
601
+ logger.debug("Copying %s to local cache", path)
551
602
  return fn
552
603
 
553
604
  def cat(
@@ -598,13 +649,14 @@ class WholeFileCacheFileSystem(CachingFileSystem):
598
649
  def _open(self, path, mode="rb", **kwargs):
599
650
  path = self._strip_protocol(path)
600
651
  if "r" not in mode:
601
- return LocalTempFile(self, path, mode=mode)
652
+ fn = self._make_local_details(path)
653
+ return LocalTempFile(self, path, mode=mode, fn=fn)
602
654
  detail = self._check_file(path)
603
655
  if detail:
604
656
  detail, fn = detail
605
657
  _, blocks = detail["fn"], detail["blocks"]
606
658
  if blocks is True:
607
- logger.debug("Opening local copy of %s" % path)
659
+ logger.debug("Opening local copy of %s", path)
608
660
 
609
661
  # In order to support downstream filesystems to be able to
610
662
  # infer the compression from the original filename, like
@@ -616,8 +668,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
616
668
  return f
617
669
  else:
618
670
  raise ValueError(
619
- "Attempt to open partially cached file %s"
620
- "as a wholly cached file" % path
671
+ f"Attempt to open partially cached file {path}"
672
+ f" as a wholly cached file"
621
673
  )
622
674
  else:
623
675
  fn = self._make_local_details(path)
@@ -665,6 +717,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
665
717
 
666
718
  protocol = "simplecache"
667
719
  local_file = True
720
+ transaction_type = WriteCachedTransaction
668
721
 
669
722
  def __init__(self, **kwargs):
670
723
  kw = kwargs.copy()
@@ -689,21 +742,52 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
689
742
  def load_cache(self):
690
743
  pass
691
744
 
745
+ def pipe_file(self, path, value=None, **kwargs):
746
+ if self._intrans:
747
+ with self.open(path, "wb") as f:
748
+ f.write(value)
749
+ else:
750
+ super().pipe_file(path, value)
751
+
752
+ def pipe(self, path, value=None, **kwargs):
753
+ if isinstance(path, str):
754
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
755
+ elif isinstance(path, dict):
756
+ for k, v in path.items():
757
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
758
+ else:
759
+ raise ValueError("path must be str or dict")
760
+
761
+ def cat_ranges(
762
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
763
+ ):
764
+ lpaths = [self._check_file(p) for p in paths]
765
+ rpaths = [p for l, p in zip(lpaths, paths) if l is False]
766
+ lpaths = [l for l, p in zip(lpaths, paths) if l is False]
767
+ self.fs.get(rpaths, lpaths)
768
+ return super().cat_ranges(
769
+ paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
770
+ )
771
+
692
772
  def _open(self, path, mode="rb", **kwargs):
693
773
  path = self._strip_protocol(path)
774
+ sha = self._mapper(path)
694
775
 
695
776
  if "r" not in mode:
696
- return LocalTempFile(self, path, mode=mode)
777
+ fn = os.path.join(self.storage[-1], sha)
778
+ return LocalTempFile(
779
+ self, path, mode=mode, autocommit=not self._intrans, fn=fn
780
+ )
697
781
  fn = self._check_file(path)
698
782
  if fn:
699
783
  return open(fn, mode)
700
784
 
701
- sha = self._mapper(path)
702
785
  fn = os.path.join(self.storage[-1], sha)
703
- logger.debug("Copying %s to local cache" % path)
786
+ logger.debug("Copying %s to local cache", path)
704
787
  kwargs["mode"] = mode
705
788
 
706
789
  self._mkcache()
790
+ self._cache_size = None
707
791
  if self.compression:
708
792
  with self.fs._open(path, **kwargs) as f, open(fn, "wb") as f2:
709
793
  if isinstance(f, AbstractBufferedFile):
@@ -728,13 +812,9 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
728
812
  class LocalTempFile:
729
813
  """A temporary local file, which will be uploaded on commit"""
730
814
 
731
- def __init__(self, fs, path, fn=None, mode="wb", autocommit=True, seek=0):
732
- if fn:
733
- self.fn = fn
734
- self.fh = open(fn, mode)
735
- else:
736
- fd, self.fn = tempfile.mkstemp()
737
- self.fh = open(fd, mode)
815
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
816
+ self.fn = fn
817
+ self.fh = open(fn, mode)
738
818
  self.mode = mode
739
819
  if seek:
740
820
  self.fh.seek(seek)
@@ -744,10 +824,10 @@ class LocalTempFile:
744
824
  self.autocommit = autocommit
745
825
 
746
826
  def __reduce__(self):
747
- # always open in rb+ to allow continuing writing at a location
827
+ # always open in r+b to allow continuing writing at a location
748
828
  return (
749
829
  LocalTempFile,
750
- (self.fs, self.path, self.fn, "rb+", self.autocommit, self.tell()),
830
+ (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
751
831
  )
752
832
 
753
833
  def __enter__(self):
@@ -0,0 +1,48 @@
1
+ import base64
2
+ import io
3
+ from urllib.parse import unquote
4
+
5
+ from fsspec import AbstractFileSystem
6
+
7
+
8
+ class DataFileSystem(AbstractFileSystem):
9
+ """A handy decoder for data-URLs
10
+
11
+ Example
12
+ -------
13
+ >>> with fsspec.open("data:,Hello%2C%20World%21") as f:
14
+ ... print(f.read())
15
+ b"Hello, World!"
16
+
17
+ """
18
+
19
+ protocol = "data"
20
+
21
+ def __init__(self, **kwargs):
22
+ """No parameters for this filesystem"""
23
+ super().__init__(**kwargs)
24
+
25
+ def cat_file(self, path, start=None, end=None, **kwargs):
26
+ pref, data = path.split(",", 1)
27
+ if pref.endswith("base64"):
28
+ return base64.b64decode(data)[start:end]
29
+ return unquote(data).encode()[start:end]
30
+
31
+ def info(self, path, **kwargs):
32
+ pref, name = path.split(",", 1)
33
+ data = self.cat_file(path)
34
+ mime = pref.split(":", 1)[1].split(";", 1)[0]
35
+ return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
36
+
37
+ def _open(
38
+ self,
39
+ path,
40
+ mode="rb",
41
+ block_size=None,
42
+ autocommit=True,
43
+ cache_options=None,
44
+ **kwargs,
45
+ ):
46
+ if "r" not in mode:
47
+ raise ValueError("Read only filesystem")
48
+ return io.BytesIO(self.cat_file(path))
@@ -57,7 +57,7 @@ class FTPFileSystem(AbstractFileSystem):
57
57
  encoding: str
58
58
  Encoding to use for directories and filenames in FTP connection
59
59
  """
60
- super(FTPFileSystem, self).__init__(**kwargs)
60
+ super().__init__(**kwargs)
61
61
  self.host = host
62
62
  self.port = port
63
63
  self.tempdir = tempdir or "/tmp"
@@ -156,7 +156,7 @@ class FTPFileSystem(AbstractFileSystem):
156
156
  outfile.write(x)
157
157
 
158
158
  self.ftp.retrbinary(
159
- "RETR %s" % rpath,
159
+ f"RETR {rpath}",
160
160
  blocksize=self.blocksize,
161
161
  callback=cb,
162
162
  )
@@ -172,7 +172,7 @@ class FTPFileSystem(AbstractFileSystem):
172
172
  out.append(x)
173
173
 
174
174
  self.ftp.retrbinary(
175
- "RETR %s" % path,
175
+ f"RETR {path}",
176
176
  blocksize=self.blocksize,
177
177
  rest=start,
178
178
  callback=cb,
@@ -252,7 +252,7 @@ class FTPFileSystem(AbstractFileSystem):
252
252
  self.dircache.clear()
253
253
  else:
254
254
  self.dircache.pop(path, None)
255
- super(FTPFileSystem, self).invalidate_cache(path)
255
+ super().invalidate_cache(path)
256
256
 
257
257
 
258
258
  class TransferDone(Exception):
@@ -321,7 +321,7 @@ class FTPFile(AbstractBufferedFile):
321
321
 
322
322
  try:
323
323
  self.fs.ftp.retrbinary(
324
- "RETR %s" % self.path,
324
+ f"RETR {self.path}",
325
325
  blocksize=self.blocksize,
326
326
  rest=start,
327
327
  callback=callback,
@@ -339,7 +339,7 @@ class FTPFile(AbstractBufferedFile):
339
339
  def _upload_chunk(self, final=False):
340
340
  self.buffer.seek(0)
341
341
  self.fs.ftp.storbinary(
342
- "STOR " + self.path, self.buffer, blocksize=self.blocksize, rest=self.offset
342
+ f"STOR {self.path}", self.buffer, blocksize=self.blocksize, rest=self.offset
343
343
  )
344
344
  return True
345
345
 
@@ -81,7 +81,7 @@ class GitFileSystem(AbstractFileSystem):
81
81
  "type": "directory",
82
82
  "name": "/".join([path, obj.name]).lstrip("/"),
83
83
  "hex": obj.hex,
84
- "mode": "%o" % obj.filemode,
84
+ "mode": f"{obj.filemode:o}",
85
85
  "size": 0,
86
86
  }
87
87
  )
@@ -91,7 +91,7 @@ class GitFileSystem(AbstractFileSystem):
91
91
  "type": "file",
92
92
  "name": "/".join([path, obj.name]).lstrip("/"),
93
93
  "hex": obj.hex,
94
- "mode": "%o" % obj.filemode,
94
+ "mode": f"{obj.filemode:o}",
95
95
  "size": obj.size,
96
96
  }
97
97
  )
@@ -102,7 +102,7 @@ class GitFileSystem(AbstractFileSystem):
102
102
  "type": "file",
103
103
  "name": obj.name,
104
104
  "hex": obj.hex,
105
- "mode": "%o" % obj.filemode,
105
+ "mode": f"{obj.filemode:o}",
106
106
  "size": obj.size,
107
107
  }
108
108
  ]
@@ -79,9 +79,7 @@ class GithubFileSystem(AbstractFileSystem):
79
79
  List of string
80
80
  """
81
81
  r = requests.get(
82
- "https://api.github.com/{part}/{org}/repos".format(
83
- part=["users", "orgs"][is_org], org=org_or_user
84
- )
82
+ f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos"
85
83
  )
86
84
  r.raise_for_status()
87
85
  return [repo["name"] for repo in r.json()]
@@ -90,8 +88,7 @@ class GithubFileSystem(AbstractFileSystem):
90
88
  def tags(self):
91
89
  """Names of tags in the repo"""
92
90
  r = requests.get(
93
- "https://api.github.com/repos/{org}/{repo}/tags"
94
- "".format(org=self.org, repo=self.repo),
91
+ f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
95
92
  **self.kw,
96
93
  )
97
94
  r.raise_for_status()
@@ -101,8 +98,7 @@ class GithubFileSystem(AbstractFileSystem):
101
98
  def branches(self):
102
99
  """Names of branches in the repo"""
103
100
  r = requests.get(
104
- "https://api.github.com/repos/{org}/{repo}/branches"
105
- "".format(org=self.org, repo=self.repo),
101
+ f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
106
102
  **self.kw,
107
103
  )
108
104
  r.raise_for_status()
@@ -14,7 +14,13 @@ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_w
14
14
  from fsspec.callbacks import _DEFAULT_CALLBACK
15
15
  from fsspec.exceptions import FSTimeoutError
16
16
  from fsspec.spec import AbstractBufferedFile
17
- from fsspec.utils import DEFAULT_BLOCK_SIZE, isfilelike, nullcontext, tokenize
17
+ from fsspec.utils import (
18
+ DEFAULT_BLOCK_SIZE,
19
+ glob_translate,
20
+ isfilelike,
21
+ nullcontext,
22
+ tokenize,
23
+ )
18
24
 
19
25
  from ..caching import AllBytes
20
26
 
@@ -165,7 +171,7 @@ class HTTPFileSystem(AsyncFileSystem):
165
171
  l = l[1]
166
172
  if l.startswith("/") and len(l) > 1:
167
173
  # absolute URL on this server
168
- l = parts.scheme + "://" + parts.netloc + l
174
+ l = f"{parts.scheme}://{parts.netloc}{l}"
169
175
  if l.startswith("http"):
170
176
  if self.same_schema and l.startswith(url.rstrip("/") + "/"):
171
177
  out.add(l)
@@ -441,8 +447,9 @@ class HTTPFileSystem(AsyncFileSystem):
441
447
  raise ValueError("maxdepth must be at least 1")
442
448
  import re
443
449
 
444
- ends = path.endswith("/")
450
+ ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
445
451
  path = self._strip_protocol(path)
452
+ append_slash_to_dirname = ends_with_slash or path.endswith("/**")
446
453
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
447
454
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
448
455
 
@@ -451,11 +458,11 @@ class HTTPFileSystem(AsyncFileSystem):
451
458
  detail = kwargs.pop("detail", False)
452
459
 
453
460
  if not has_magic(path):
454
- if await self._exists(path):
461
+ if await self._exists(path, **kwargs):
455
462
  if not detail:
456
463
  return [path]
457
464
  else:
458
- return {path: await self._info(path)}
465
+ return {path: await self._info(path, **kwargs)}
459
466
  else:
460
467
  if not detail:
461
468
  return [] # glob of non-existent returns empty
@@ -480,45 +487,22 @@ class HTTPFileSystem(AsyncFileSystem):
480
487
  allpaths = await self._find(
481
488
  root, maxdepth=depth, withdirs=True, detail=True, **kwargs
482
489
  )
483
- # Escape characters special to python regex, leaving our supported
484
- # special characters in place.
485
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
486
- # for shell globbing details.
487
- pattern = (
488
- "^"
489
- + (
490
- path.replace("\\", r"\\")
491
- .replace(".", r"\.")
492
- .replace("+", r"\+")
493
- .replace("//", "/")
494
- .replace("(", r"\(")
495
- .replace(")", r"\)")
496
- .replace("|", r"\|")
497
- .replace("^", r"\^")
498
- .replace("$", r"\$")
499
- .replace("{", r"\{")
500
- .replace("}", r"\}")
501
- .rstrip("/")
502
- )
503
- + "$"
504
- )
505
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
506
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
507
- pattern = re.sub("[*]", "[^/]*", pattern)
508
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
509
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
490
+
491
+ pattern = glob_translate(path + ("/" if ends_with_slash else ""))
510
492
  pattern = re.compile(pattern)
493
+
511
494
  out = {
512
- p: allpaths[p]
513
- for p in sorted(allpaths)
514
- if pattern.match(p.replace("//", "/").rstrip("/"))
495
+ p: info
496
+ for p, info in sorted(allpaths.items())
497
+ if pattern.match(
498
+ (
499
+ p + "/"
500
+ if append_slash_to_dirname and info["type"] == "directory"
501
+ else p
502
+ )
503
+ )
515
504
  }
516
505
 
517
- # Return directories only when the glob end by a slash
518
- # This is needed for posix glob compliance
519
- if ends:
520
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
521
-
522
506
  if detail:
523
507
  return out
524
508
  else:
@@ -655,8 +639,8 @@ class HTTPFile(AbstractBufferedFile):
655
639
  logger.debug(f"Fetch range for {self}: {start}-{end}")
656
640
  kwargs = self.kwargs.copy()
657
641
  headers = kwargs.pop("headers", {}).copy()
658
- headers["Range"] = "bytes=%i-%i" % (start, end - 1)
659
- logger.debug(str(self.url) + " : " + headers["Range"])
642
+ headers["Range"] = f"bytes={start}-{end - 1}"
643
+ logger.debug(f"{self.url} : {headers['Range']}")
660
644
  r = await self.session.get(
661
645
  self.fs.encode_url(self.url), headers=headers, **kwargs
662
646
  )
@@ -812,13 +796,13 @@ async def get_range(session, url, start, end, file=None, **kwargs):
812
796
  # explicit get a range when we know it must be safe
813
797
  kwargs = kwargs.copy()
814
798
  headers = kwargs.pop("headers", {}).copy()
815
- headers["Range"] = "bytes=%i-%i" % (start, end - 1)
799
+ headers["Range"] = f"bytes={start}-{end - 1}"
816
800
  r = await session.get(url, headers=headers, **kwargs)
817
801
  r.raise_for_status()
818
802
  async with r:
819
803
  out = await r.read()
820
804
  if file:
821
- with open(file, "rb+") as f:
805
+ with open(file, "r+b") as f:
822
806
  f.seek(start)
823
807
  f.write(out)
824
808
  else:
@@ -831,7 +815,7 @@ async def _file_info(url, session, size_policy="head", **kwargs):
831
815
  Default operation is to explicitly allow redirects and use encoding
832
816
  'identity' (no compression) to get the true size of the target.
833
817
  """
834
- logger.debug("Retrieve file size for %s" % url)
818
+ logger.debug("Retrieve file size for %s", url)
835
819
  kwargs = kwargs.copy()
836
820
  ar = kwargs.pop("allow_redirects", True)
837
821
  head = kwargs.get("headers", {}).copy()
@@ -844,7 +828,7 @@ async def _file_info(url, session, size_policy="head", **kwargs):
844
828
  elif size_policy == "get":
845
829
  r = await session.get(url, allow_redirects=ar, **kwargs)
846
830
  else:
847
- raise TypeError('size_policy must be "head" or "get", got %s' "" % size_policy)
831
+ raise TypeError(f'size_policy must be "head" or "get", got {size_policy}')
848
832
  async with r:
849
833
  r.raise_for_status()
850
834
 
@@ -855,7 +839,10 @@ async def _file_info(url, session, size_policy="head", **kwargs):
855
839
  if "Content-Length" in r.headers:
856
840
  # Some servers may choose to ignore Accept-Encoding and return
857
841
  # compressed content, in which case the returned size is unreliable.
858
- if r.headers.get("Content-Encoding", "identity") == "identity":
842
+ if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
843
+ "identity",
844
+ "",
845
+ ]:
859
846
  info["size"] = int(r.headers["Content-Length"])
860
847
  elif "Content-Range" in r.headers:
861
848
  info["size"] = int(r.headers["Content-Range"].split("/")[1])