fsspec 2023.10.0__tar.gz → 2023.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {fsspec-2023.10.0/fsspec.egg-info → fsspec-2023.12.0}/PKG-INFO +1 -1
  2. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/_version.py +3 -3
  3. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/asyn.py +34 -44
  4. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/caching.py +1 -1
  5. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/core.py +16 -3
  6. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/generic.py +4 -0
  7. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/gui.py +1 -1
  8. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/cached.py +70 -14
  9. fsspec-2023.12.0/fsspec/implementations/data.py +48 -0
  10. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/http.py +28 -41
  11. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/memory.py +1 -1
  12. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/reference.py +49 -16
  13. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/smb.py +0 -1
  14. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/webhdfs.py +26 -4
  15. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/registry.py +2 -1
  16. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/spec.py +36 -48
  17. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/common.py +5 -5
  18. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/transaction.py +8 -4
  19. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/utils.py +114 -1
  20. {fsspec-2023.10.0 → fsspec-2023.12.0/fsspec.egg-info}/PKG-INFO +1 -1
  21. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/SOURCES.txt +1 -0
  22. {fsspec-2023.10.0 → fsspec-2023.12.0}/pyproject.toml +1 -1
  23. {fsspec-2023.10.0 → fsspec-2023.12.0}/setup.cfg +2 -2
  24. {fsspec-2023.10.0 → fsspec-2023.12.0}/LICENSE +0 -0
  25. {fsspec-2023.10.0 → fsspec-2023.12.0}/MANIFEST.in +0 -0
  26. {fsspec-2023.10.0 → fsspec-2023.12.0}/README.md +0 -0
  27. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/__init__.py +0 -0
  28. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/archive.py +0 -0
  29. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/callbacks.py +0 -0
  30. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/compression.py +0 -0
  31. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/config.py +0 -0
  32. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/conftest.py +0 -0
  33. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/dircache.py +0 -0
  34. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/exceptions.py +0 -0
  35. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/fuse.py +0 -0
  36. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/__init__.py +0 -0
  37. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/arrow.py +0 -0
  38. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/cache_mapper.py +0 -0
  39. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/cache_metadata.py +0 -0
  40. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/dask.py +0 -0
  41. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/dbfs.py +0 -0
  42. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/dirfs.py +0 -0
  43. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/ftp.py +0 -0
  44. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/git.py +0 -0
  45. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/github.py +0 -0
  46. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/jupyter.py +0 -0
  47. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/libarchive.py +0 -0
  48. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/local.py +0 -0
  49. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/sftp.py +0 -0
  50. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/tar.py +0 -0
  51. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/zip.py +0 -0
  52. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/mapping.py +0 -0
  53. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/parquet.py +0 -0
  54. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/__init__.py +0 -0
  55. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/copy.py +0 -0
  56. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/get.py +0 -0
  57. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/put.py +0 -0
  58. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/dependency_links.txt +0 -0
  59. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/not-zip-safe +0 -0
  60. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/requires.txt +0 -0
  61. {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/top_level.txt +0 -0
  62. {fsspec-2023.10.0 → fsspec-2023.12.0}/requirements.txt +0 -0
  63. {fsspec-2023.10.0 → fsspec-2023.12.0}/setup.py +0 -0
  64. {fsspec-2023.10.0 → fsspec-2023.12.0}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fsspec
3
- Version: 2023.10.0
3
+ Version: 2023.12.0
4
4
  Summary: File-system specification
5
5
  Home-page: https://github.com/fsspec/filesystem_spec
6
6
  Maintainer: Martin Durant
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-10-21T13:35:51-0400",
11
+ "date": "2023-12-02T20:51:30-0500",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "e20f626b87b5bb87d223495a56aefd768272a7ca",
15
- "version": "2023.10.0"
14
+ "full-revisionid": "5cf9cd952c5d276835d3caef9c32fcf69d55b10c",
15
+ "version": "2023.12.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -15,7 +15,7 @@ from .callbacks import _DEFAULT_CALLBACK
15
15
  from .exceptions import FSTimeoutError
16
16
  from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
17
  from .spec import AbstractBufferedFile, AbstractFileSystem
18
- from .utils import is_exception, other_paths
18
+ from .utils import glob_translate, is_exception, other_paths
19
19
 
20
20
  private = re.compile("_[^_]")
21
21
  iothread = [None] # dedicated fsspec IO thread
@@ -106,7 +106,7 @@ def sync(loop, func, *args, timeout=None, **kwargs):
106
106
 
107
107
 
108
108
  def sync_wrapper(func, obj=None):
109
- """Given a function, make so can be called in async or blocking contexts
109
+ """Given a function, make so can be called in blocking contexts
110
110
 
111
111
  Leave obj=None if defining within a class. Pass the instance if attaching
112
112
  as an attribute of the instance.
@@ -467,6 +467,16 @@ class AsyncFileSystem(AbstractFileSystem):
467
467
  on_error="return",
468
468
  **kwargs,
469
469
  ):
470
+ """Get the contents of byte ranges from one or more files
471
+
472
+ Parameters
473
+ ----------
474
+ paths: list
475
+ A list of of filepaths on this filesystems
476
+ starts, ends: int or list
477
+ Bytes limits of the read. If using a single int, the same value will be
478
+ used to read all the specified files.
479
+ """
470
480
  # TODO: on_error
471
481
  if max_gap is not None:
472
482
  # use utils.merge_offset_ranges
@@ -476,7 +486,7 @@ class AsyncFileSystem(AbstractFileSystem):
476
486
  if not isinstance(starts, Iterable):
477
487
  starts = [starts] * len(paths)
478
488
  if not isinstance(ends, Iterable):
479
- ends = [starts] * len(paths)
489
+ ends = [ends] * len(paths)
480
490
  if len(starts) != len(paths) or len(ends) != len(paths):
481
491
  raise ValueError
482
492
  coros = [
@@ -662,9 +672,9 @@ class AsyncFileSystem(AbstractFileSystem):
662
672
  [self._size(p) for p in paths], batch_size=batch_size
663
673
  )
664
674
 
665
- async def _exists(self, path):
675
+ async def _exists(self, path, **kwargs):
666
676
  try:
667
- await self._info(path)
677
+ await self._info(path, **kwargs)
668
678
  return True
669
679
  except FileNotFoundError:
670
680
  return False
@@ -735,8 +745,12 @@ class AsyncFileSystem(AbstractFileSystem):
735
745
 
736
746
  import re
737
747
 
738
- ends = path.endswith("/")
748
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
749
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
739
750
  path = self._strip_protocol(path)
751
+ append_slash_to_dirname = ends_with_sep or path.endswith(
752
+ tuple(sep + "**" for sep in seps)
753
+ )
740
754
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
741
755
  idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
742
756
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
@@ -746,11 +760,11 @@ class AsyncFileSystem(AbstractFileSystem):
746
760
  detail = kwargs.pop("detail", False)
747
761
 
748
762
  if not has_magic(path):
749
- if await self._exists(path):
763
+ if await self._exists(path, **kwargs):
750
764
  if not detail:
751
765
  return [path]
752
766
  else:
753
- return {path: await self._info(path)}
767
+ return {path: await self._info(path, **kwargs)}
754
768
  else:
755
769
  if not detail:
756
770
  return [] # glob of non-existent returns empty
@@ -775,46 +789,22 @@ class AsyncFileSystem(AbstractFileSystem):
775
789
  allpaths = await self._find(
776
790
  root, maxdepth=depth, withdirs=True, detail=True, **kwargs
777
791
  )
778
- # Escape characters special to python regex, leaving our supported
779
- # special characters in place.
780
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
781
- # for shell globbing details.
782
- pattern = (
783
- "^"
784
- + (
785
- path.replace("\\", r"\\")
786
- .replace(".", r"\.")
787
- .replace("+", r"\+")
788
- .replace("//", "/")
789
- .replace("(", r"\(")
790
- .replace(")", r"\)")
791
- .replace("|", r"\|")
792
- .replace("^", r"\^")
793
- .replace("$", r"\$")
794
- .replace("{", r"\{")
795
- .replace("}", r"\}")
796
- .rstrip("/")
797
- .replace("?", ".")
798
- )
799
- + "$"
800
- )
801
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
802
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
803
- pattern = re.sub("[*]", "[^/]*", pattern)
804
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
805
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
792
+
793
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
806
794
  pattern = re.compile(pattern)
795
+
807
796
  out = {
808
- p: allpaths[p]
809
- for p in sorted(allpaths)
810
- if pattern.match(p.replace("//", "/").rstrip("/"))
797
+ p: info
798
+ for p, info in sorted(allpaths.items())
799
+ if pattern.match(
800
+ (
801
+ p + "/"
802
+ if append_slash_to_dirname and info["type"] == "directory"
803
+ else p
804
+ )
805
+ )
811
806
  }
812
807
 
813
- # Return directories only when the glob end by a slash
814
- # This is needed for posix glob compliance
815
- if ends:
816
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
817
-
818
808
  if detail:
819
809
  return out
820
810
  else:
@@ -111,7 +111,7 @@ class MMapCache(BaseCache):
111
111
  fd.write(b"1")
112
112
  fd.flush()
113
113
  else:
114
- fd = open(self.location, "rb+")
114
+ fd = open(self.location, "r+b")
115
115
 
116
116
  return mmap.mmap(fd.fileno(), self.size)
117
117
 
@@ -1,8 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import io
2
4
  import logging
3
5
  import os
4
6
  import re
5
7
  from glob import has_magic
8
+ from pathlib import Path
6
9
 
7
10
  # for backwards compat, we export cache things from here too
8
11
  from .caching import ( # noqa: F401
@@ -290,7 +293,11 @@ def open_files(
290
293
  fs.auto_mkdir = auto_mkdir
291
294
  elif "r" not in mode and auto_mkdir:
292
295
  parents = {fs._parent(path) for path in paths}
293
- [fs.makedirs(parent, exist_ok=True) for parent in parents]
296
+ for parent in parents:
297
+ try:
298
+ fs.makedirs(parent, exist_ok=True)
299
+ except PermissionError:
300
+ pass
294
301
  return OpenFiles(
295
302
  [
296
303
  OpenFile(
@@ -465,7 +472,11 @@ def open(
465
472
  return out[0]
466
473
 
467
474
 
468
- def open_local(url, mode="rb", **storage_options):
475
+ def open_local(
476
+ url: str | list[str] | Path | list[Path],
477
+ mode: str = "rb",
478
+ **storage_options: dict,
479
+ ) -> str | list[str]:
469
480
  """Open file(s) which can be resolved to local
470
481
 
471
482
  For files which either are local, or get downloaded upon open
@@ -489,7 +500,7 @@ def open_local(url, mode="rb", **storage_options):
489
500
  )
490
501
  with of as files:
491
502
  paths = [f.name for f in files]
492
- if isinstance(url, str) and not has_magic(url):
503
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
493
504
  return paths[0]
494
505
  return paths
495
506
 
@@ -510,6 +521,8 @@ def split_protocol(urlpath):
510
521
  if len(protocol) > 1:
511
522
  # excludes Windows paths
512
523
  return protocol, path
524
+ if ":" in urlpath and urlpath.find(":") > 1:
525
+ return urlpath.split(":", 1)
513
526
  return None, urlpath
514
527
 
515
528
 
@@ -171,6 +171,10 @@ class GenericFileSystem(AsyncFileSystem):
171
171
  self.method = default_method
172
172
  super().__init__(**kwargs)
173
173
 
174
+ def _parent(self, path):
175
+ fs = _resolve_fs(path, self.method)
176
+ return fs.unstrip_protocol(fs._parent(path))
177
+
174
178
  def _strip_protocol(self, path):
175
179
  # normalization only
176
180
  fs = _resolve_fs(path, self.method)
@@ -242,7 +242,7 @@ class FileSelector(SigSlot):
242
242
  else:
243
243
  self.init_protocol, url = "file", os.getcwd()
244
244
  self.init_url = url
245
- self.init_kwargs = kwargs or "{}"
245
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
246
246
  self.filters = filters
247
247
  self.ignore = [re.compile(i) for i in ignore or []]
248
248
  self._fs = None
@@ -17,6 +17,7 @@ from fsspec.exceptions import BlocksizeMismatchError
17
17
  from fsspec.implementations.cache_mapper import create_cache_mapper
18
18
  from fsspec.implementations.cache_metadata import CacheMetadata
19
19
  from fsspec.spec import AbstractBufferedFile
20
+ from fsspec.transaction import Transaction
20
21
  from fsspec.utils import infer_compression
21
22
 
22
23
  if TYPE_CHECKING:
@@ -25,6 +26,16 @@ if TYPE_CHECKING:
25
26
  logger = logging.getLogger("fsspec.cached")
26
27
 
27
28
 
29
+ class WriteCachedTransaction(Transaction):
30
+ def complete(self, commit=True):
31
+ rpaths = [f.path for f in self.files]
32
+ lpaths = [f.fn for f in self.files]
33
+ if commit:
34
+ self.fs.put(lpaths, rpaths)
35
+ # else remove?
36
+ self.fs._intrans = False
37
+
38
+
28
39
  class CachingFileSystem(AbstractFileSystem):
29
40
  """Locally caching filesystem, layer over any other FS
30
41
 
@@ -355,6 +366,9 @@ class CachingFileSystem(AbstractFileSystem):
355
366
  self.save_cache()
356
367
  return f
357
368
 
369
+ def _parent(self, path):
370
+ return self.fs._parent(path)
371
+
358
372
  def hash_name(self, path: str, *args: Any) -> str:
359
373
  # Kept for backward compatibility with downstream libraries.
360
374
  # Ignores extra arguments, previously same_name boolean.
@@ -390,6 +404,7 @@ class CachingFileSystem(AbstractFileSystem):
390
404
  "open",
391
405
  "cat",
392
406
  "cat_file",
407
+ "cat_ranges",
393
408
  "get",
394
409
  "read_block",
395
410
  "tail",
@@ -411,6 +426,10 @@ class CachingFileSystem(AbstractFileSystem):
411
426
  "__eq__",
412
427
  "to_json",
413
428
  "cache_size",
429
+ "pipe_file",
430
+ "pipe",
431
+ "start_transaction",
432
+ "end_transaction",
414
433
  ]:
415
434
  # all the methods defined in this class. Note `open` here, since
416
435
  # it calls `_open`, but is actually in superclass
@@ -419,7 +438,10 @@ class CachingFileSystem(AbstractFileSystem):
419
438
  )
420
439
  if item in ["__reduce_ex__"]:
421
440
  raise AttributeError
422
- if item in ["_cache"]:
441
+ if item in ["transaction"]:
442
+ # property
443
+ return type(self).transaction.__get__(self)
444
+ if item in ["_cache", "transaction_type"]:
423
445
  # class attributes
424
446
  return getattr(type(self), item)
425
447
  if item == "__class__":
@@ -508,7 +530,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
508
530
  self._mkcache()
509
531
  else:
510
532
  return [
511
- LocalTempFile(self.fs, path, mode=open_files.mode) for path in paths
533
+ LocalTempFile(
534
+ self.fs,
535
+ path,
536
+ mode=open_files.mode,
537
+ fn=os.path.join(self.storage[-1], self._mapper(path)),
538
+ )
539
+ for path in paths
512
540
  ]
513
541
 
514
542
  if self.compression:
@@ -621,7 +649,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
621
649
  def _open(self, path, mode="rb", **kwargs):
622
650
  path = self._strip_protocol(path)
623
651
  if "r" not in mode:
624
- return LocalTempFile(self, path, mode=mode)
652
+ fn = self._make_local_details(path)
653
+ return LocalTempFile(self, path, mode=mode, fn=fn)
625
654
  detail = self._check_file(path)
626
655
  if detail:
627
656
  detail, fn = detail
@@ -688,6 +717,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
688
717
 
689
718
  protocol = "simplecache"
690
719
  local_file = True
720
+ transaction_type = WriteCachedTransaction
691
721
 
692
722
  def __init__(self, **kwargs):
693
723
  kw = kwargs.copy()
@@ -712,16 +742,46 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
712
742
  def load_cache(self):
713
743
  pass
714
744
 
745
+ def pipe_file(self, path, value=None, **kwargs):
746
+ if self._intrans:
747
+ with self.open(path, "wb") as f:
748
+ f.write(value)
749
+ else:
750
+ super().pipe_file(path, value)
751
+
752
+ def pipe(self, path, value=None, **kwargs):
753
+ if isinstance(path, str):
754
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
755
+ elif isinstance(path, dict):
756
+ for k, v in path.items():
757
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
758
+ else:
759
+ raise ValueError("path must be str or dict")
760
+
761
+ def cat_ranges(
762
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
763
+ ):
764
+ lpaths = [self._check_file(p) for p in paths]
765
+ rpaths = [p for l, p in zip(lpaths, paths) if l is False]
766
+ lpaths = [l for l, p in zip(lpaths, paths) if l is False]
767
+ self.fs.get(rpaths, lpaths)
768
+ return super().cat_ranges(
769
+ paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
770
+ )
771
+
715
772
  def _open(self, path, mode="rb", **kwargs):
716
773
  path = self._strip_protocol(path)
774
+ sha = self._mapper(path)
717
775
 
718
776
  if "r" not in mode:
719
- return LocalTempFile(self, path, mode=mode)
777
+ fn = os.path.join(self.storage[-1], sha)
778
+ return LocalTempFile(
779
+ self, path, mode=mode, autocommit=not self._intrans, fn=fn
780
+ )
720
781
  fn = self._check_file(path)
721
782
  if fn:
722
783
  return open(fn, mode)
723
784
 
724
- sha = self._mapper(path)
725
785
  fn = os.path.join(self.storage[-1], sha)
726
786
  logger.debug("Copying %s to local cache", path)
727
787
  kwargs["mode"] = mode
@@ -752,13 +812,9 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
752
812
  class LocalTempFile:
753
813
  """A temporary local file, which will be uploaded on commit"""
754
814
 
755
- def __init__(self, fs, path, fn=None, mode="wb", autocommit=True, seek=0):
756
- if fn:
757
- self.fn = fn
758
- self.fh = open(fn, mode)
759
- else:
760
- fd, self.fn = tempfile.mkstemp()
761
- self.fh = open(fd, mode)
815
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
816
+ self.fn = fn
817
+ self.fh = open(fn, mode)
762
818
  self.mode = mode
763
819
  if seek:
764
820
  self.fh.seek(seek)
@@ -768,10 +824,10 @@ class LocalTempFile:
768
824
  self.autocommit = autocommit
769
825
 
770
826
  def __reduce__(self):
771
- # always open in rb+ to allow continuing writing at a location
827
+ # always open in r+b to allow continuing writing at a location
772
828
  return (
773
829
  LocalTempFile,
774
- (self.fs, self.path, self.fn, "rb+", self.autocommit, self.tell()),
830
+ (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
775
831
  )
776
832
 
777
833
  def __enter__(self):
@@ -0,0 +1,48 @@
1
+ import base64
2
+ import io
3
+ from urllib.parse import unquote
4
+
5
+ from fsspec import AbstractFileSystem
6
+
7
+
8
+ class DataFileSystem(AbstractFileSystem):
9
+ """A handy decoder for data-URLs
10
+
11
+ Example
12
+ -------
13
+ >>> with fsspec.open("data:,Hello%2C%20World%21") as f:
14
+ ... print(f.read())
15
+ b"Hello, World!"
16
+
17
+ """
18
+
19
+ protocol = "data"
20
+
21
+ def __init__(self, **kwargs):
22
+ """No parameters for this filesystem"""
23
+ super().__init__(**kwargs)
24
+
25
+ def cat_file(self, path, start=None, end=None, **kwargs):
26
+ pref, data = path.split(",", 1)
27
+ if pref.endswith("base64"):
28
+ return base64.b64decode(data)[start:end]
29
+ return unquote(data).encode()[start:end]
30
+
31
+ def info(self, path, **kwargs):
32
+ pref, name = path.split(",", 1)
33
+ data = self.cat_file(path)
34
+ mime = pref.split(":", 1)[1].split(";", 1)[0]
35
+ return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
36
+
37
+ def _open(
38
+ self,
39
+ path,
40
+ mode="rb",
41
+ block_size=None,
42
+ autocommit=True,
43
+ cache_options=None,
44
+ **kwargs,
45
+ ):
46
+ if "r" not in mode:
47
+ raise ValueError("Read only filesystem")
48
+ return io.BytesIO(self.cat_file(path))
@@ -14,7 +14,13 @@ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_w
14
14
  from fsspec.callbacks import _DEFAULT_CALLBACK
15
15
  from fsspec.exceptions import FSTimeoutError
16
16
  from fsspec.spec import AbstractBufferedFile
17
- from fsspec.utils import DEFAULT_BLOCK_SIZE, isfilelike, nullcontext, tokenize
17
+ from fsspec.utils import (
18
+ DEFAULT_BLOCK_SIZE,
19
+ glob_translate,
20
+ isfilelike,
21
+ nullcontext,
22
+ tokenize,
23
+ )
18
24
 
19
25
  from ..caching import AllBytes
20
26
 
@@ -441,8 +447,9 @@ class HTTPFileSystem(AsyncFileSystem):
441
447
  raise ValueError("maxdepth must be at least 1")
442
448
  import re
443
449
 
444
- ends = path.endswith("/")
450
+ ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
445
451
  path = self._strip_protocol(path)
452
+ append_slash_to_dirname = ends_with_slash or path.endswith("/**")
446
453
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
447
454
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
448
455
 
@@ -451,11 +458,11 @@ class HTTPFileSystem(AsyncFileSystem):
451
458
  detail = kwargs.pop("detail", False)
452
459
 
453
460
  if not has_magic(path):
454
- if await self._exists(path):
461
+ if await self._exists(path, **kwargs):
455
462
  if not detail:
456
463
  return [path]
457
464
  else:
458
- return {path: await self._info(path)}
465
+ return {path: await self._info(path, **kwargs)}
459
466
  else:
460
467
  if not detail:
461
468
  return [] # glob of non-existent returns empty
@@ -480,45 +487,22 @@ class HTTPFileSystem(AsyncFileSystem):
480
487
  allpaths = await self._find(
481
488
  root, maxdepth=depth, withdirs=True, detail=True, **kwargs
482
489
  )
483
- # Escape characters special to python regex, leaving our supported
484
- # special characters in place.
485
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
486
- # for shell globbing details.
487
- pattern = (
488
- "^"
489
- + (
490
- path.replace("\\", r"\\")
491
- .replace(".", r"\.")
492
- .replace("+", r"\+")
493
- .replace("//", "/")
494
- .replace("(", r"\(")
495
- .replace(")", r"\)")
496
- .replace("|", r"\|")
497
- .replace("^", r"\^")
498
- .replace("$", r"\$")
499
- .replace("{", r"\{")
500
- .replace("}", r"\}")
501
- .rstrip("/")
502
- )
503
- + "$"
504
- )
505
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
506
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
507
- pattern = re.sub("[*]", "[^/]*", pattern)
508
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
509
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
490
+
491
+ pattern = glob_translate(path + ("/" if ends_with_slash else ""))
510
492
  pattern = re.compile(pattern)
493
+
511
494
  out = {
512
- p: allpaths[p]
513
- for p in sorted(allpaths)
514
- if pattern.match(p.replace("//", "/").rstrip("/"))
495
+ p: info
496
+ for p, info in sorted(allpaths.items())
497
+ if pattern.match(
498
+ (
499
+ p + "/"
500
+ if append_slash_to_dirname and info["type"] == "directory"
501
+ else p
502
+ )
503
+ )
515
504
  }
516
505
 
517
- # Return directories only when the glob end by a slash
518
- # This is needed for posix glob compliance
519
- if ends:
520
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
521
-
522
506
  if detail:
523
507
  return out
524
508
  else:
@@ -818,7 +802,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
818
802
  async with r:
819
803
  out = await r.read()
820
804
  if file:
821
- with open(file, "rb+") as f:
805
+ with open(file, "r+b") as f:
822
806
  f.seek(start)
823
807
  f.write(out)
824
808
  else:
@@ -855,7 +839,10 @@ async def _file_info(url, session, size_policy="head", **kwargs):
855
839
  if "Content-Length" in r.headers:
856
840
  # Some servers may choose to ignore Accept-Encoding and return
857
841
  # compressed content, in which case the returned size is unreliable.
858
- if r.headers.get("Content-Encoding", "identity") == "identity":
842
+ if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
843
+ "identity",
844
+ "",
845
+ ]:
859
846
  info["size"] = int(r.headers["Content-Length"])
860
847
  elif "Content-Range" in r.headers:
861
848
  info["size"] = int(r.headers["Content-Range"].split("/")[1])
@@ -175,7 +175,7 @@ class MemoryFileSystem(AbstractFileSystem):
175
175
  parent = self._parent(parent)
176
176
  if self.isfile(parent):
177
177
  raise FileExistsError(parent)
178
- if mode in ["rb", "ab", "rb+"]:
178
+ if mode in ["rb", "ab", "r+b"]:
179
179
  if path in self.store:
180
180
  f = self.store[path]
181
181
  if mode == "ab":