fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/spec.py CHANGED
@@ -17,6 +17,7 @@ from .dircache import DirCache
17
17
  from .transaction import Transaction
18
18
  from .utils import (
19
19
  _unstrip_protocol,
20
+ glob_translate,
20
21
  isfilelike,
21
22
  other_paths,
22
23
  read_block,
@@ -109,6 +110,7 @@ class AbstractFileSystem(metaclass=_Cached):
109
110
  async_impl = False
110
111
  mirror_sync_methods = False
111
112
  root_marker = "" # For some FSs, may require leading '/' or other character
113
+ transaction_type = Transaction
112
114
 
113
115
  #: Extra *class attributes* that should be considered when hashing.
114
116
  _extra_tokenize_attributes = ()
@@ -196,7 +198,7 @@ class AbstractFileSystem(metaclass=_Cached):
196
198
  # use of root_marker to make minimum required path, e.g., "/"
197
199
  return path or cls.root_marker
198
200
 
199
- def unstrip_protocol(self, name):
201
+ def unstrip_protocol(self, name: str) -> str:
200
202
  """Format FS-specific path to generic, including protocol"""
201
203
  protos = (self.protocol,) if isinstance(self.protocol, str) else self.protocol
202
204
  for protocol in protos:
@@ -235,20 +237,20 @@ class AbstractFileSystem(metaclass=_Cached):
235
237
  for the normal and exception cases.
236
238
  """
237
239
  if self._transaction is None:
238
- self._transaction = Transaction(self)
240
+ self._transaction = self.transaction_type(self)
239
241
  return self._transaction
240
242
 
241
243
  def start_transaction(self):
242
244
  """Begin write transaction for deferring files, non-context version"""
243
245
  self._intrans = True
244
- self._transaction = Transaction(self)
246
+ self._transaction = self.transaction_type(self)
245
247
  return self.transaction
246
248
 
247
249
  def end_transaction(self):
248
250
  """Finish write transaction, non-context version"""
249
251
  self.transaction.complete()
250
252
  self._transaction = None
251
- # The invalid cache must be cleared after the transcation is completed.
253
+ # The invalid cache must be cleared after the transaction is completed.
252
254
  for path in self._invalidated_caches_in_transaction:
253
255
  self.invalidate_cache(path)
254
256
  self._invalidated_caches_in_transaction.clear()
@@ -551,10 +553,6 @@ class AbstractFileSystem(metaclass=_Cached):
551
553
 
552
554
  The `maxdepth` option is applied on the first `**` found in the path.
553
555
 
554
- Search path names that contain embedded characters special to this
555
- implementation of glob may not produce expected results;
556
- e.g., 'foo/bar/*starredfilename*'.
557
-
558
556
  kwargs are passed to ``ls``.
559
557
  """
560
558
  if maxdepth is not None and maxdepth < 1:
@@ -562,8 +560,12 @@ class AbstractFileSystem(metaclass=_Cached):
562
560
 
563
561
  import re
564
562
 
565
- ends = path.endswith("/")
563
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
564
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
566
565
  path = self._strip_protocol(path)
566
+ append_slash_to_dirname = ends_with_sep or path.endswith(
567
+ tuple(sep + "**" for sep in seps)
568
+ )
567
569
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
568
570
  idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
569
571
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
@@ -573,11 +575,11 @@ class AbstractFileSystem(metaclass=_Cached):
573
575
  detail = kwargs.pop("detail", False)
574
576
 
575
577
  if not has_magic(path):
576
- if self.exists(path):
578
+ if self.exists(path, **kwargs):
577
579
  if not detail:
578
580
  return [path]
579
581
  else:
580
- return {path: self.info(path)}
582
+ return {path: self.info(path, **kwargs)}
581
583
  else:
582
584
  if not detail:
583
585
  return [] # glob of non-existent returns empty
@@ -600,47 +602,22 @@ class AbstractFileSystem(metaclass=_Cached):
600
602
  depth = None
601
603
 
602
604
  allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
603
- # Escape characters special to python regex, leaving our supported
604
- # special characters in place.
605
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
606
- # for shell globbing details.
607
- pattern = (
608
- "^"
609
- + (
610
- path.replace("\\", r"\\")
611
- .replace(".", r"\.")
612
- .replace("+", r"\+")
613
- .replace("//", "/")
614
- .replace("(", r"\(")
615
- .replace(")", r"\)")
616
- .replace("|", r"\|")
617
- .replace("^", r"\^")
618
- .replace("$", r"\$")
619
- .replace("{", r"\{")
620
- .replace("}", r"\}")
621
- .rstrip("/")
622
- .replace("?", ".")
623
- )
624
- + "$"
625
- )
626
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
627
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
628
- pattern = re.sub("[*]", "[^/]*", pattern)
629
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
630
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
605
+
606
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
631
607
  pattern = re.compile(pattern)
632
608
 
633
609
  out = {
634
- p: allpaths[p]
635
- for p in sorted(allpaths)
636
- if pattern.match(p.replace("//", "/").rstrip("/"))
610
+ p: info
611
+ for p, info in sorted(allpaths.items())
612
+ if pattern.match(
613
+ (
614
+ p + "/"
615
+ if append_slash_to_dirname and info["type"] == "directory"
616
+ else p
617
+ )
618
+ )
637
619
  }
638
620
 
639
- # Return directories only when the glob end by a slash
640
- # This is needed for posix glob compliance
641
- if ends:
642
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
643
-
644
621
  if detail:
645
622
  return out
646
623
  else:
@@ -828,6 +805,16 @@ class AbstractFileSystem(metaclass=_Cached):
828
805
  def cat_ranges(
829
806
  self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
830
807
  ):
808
+ """Get the contents of byte ranges from one or more files
809
+
810
+ Parameters
811
+ ----------
812
+ paths: list
813
+ A list of of filepaths on this filesystems
814
+ starts, ends: int or list
815
+ Bytes limits of the read. If using a single int, the same value will be
816
+ used to read all the specified files.
817
+ """
831
818
  if max_gap is not None:
832
819
  raise NotImplementedError
833
820
  if not isinstance(paths, list):
@@ -835,7 +822,7 @@ class AbstractFileSystem(metaclass=_Cached):
835
822
  if not isinstance(starts, list):
836
823
  starts = [starts] * len(paths)
837
824
  if not isinstance(ends, list):
838
- ends = [starts] * len(paths)
825
+ ends = [ends] * len(paths)
839
826
  if len(starts) != len(paths) or len(ends) != len(paths):
840
827
  raise ValueError
841
828
  out = []
@@ -901,7 +888,8 @@ class AbstractFileSystem(metaclass=_Cached):
901
888
  os.makedirs(lpath, exist_ok=True)
902
889
  return None
903
890
 
904
- LocalFileSystem(auto_mkdir=True).makedirs(self._parent(lpath), exist_ok=True)
891
+ fs = LocalFileSystem(auto_mkdir=True)
892
+ fs.makedirs(fs._parent(lpath), exist_ok=True)
905
893
 
906
894
  with self.open(rpath, "rb", **kwargs) as f1:
907
895
  if outfile is None:
@@ -1187,9 +1175,7 @@ class AbstractFileSystem(metaclass=_Cached):
1187
1175
  def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs):
1188
1176
  """Move file(s) from one location to another"""
1189
1177
  if path1 == path2:
1190
- logger.debug(
1191
- "%s mv: The paths are the same, so no files were moved." % (self)
1192
- )
1178
+ logger.debug("%s mv: The paths are the same, so no files were moved.", self)
1193
1179
  else:
1194
1180
  self.copy(path1, path2, recursive=recursive, maxdepth=maxdepth)
1195
1181
  self.rm(path1, recursive=recursive)
@@ -1744,7 +1730,7 @@ class AbstractBufferedFile(io.IOBase):
1744
1730
  elif whence == 2:
1745
1731
  nloc = self.size + loc
1746
1732
  else:
1747
- raise ValueError("invalid whence (%s, should be 0, 1 or 2)" % whence)
1733
+ raise ValueError(f"invalid whence ({whence}, should be 0, 1 or 2)")
1748
1734
  if nloc < 0:
1749
1735
  raise ValueError("Seek before start of file")
1750
1736
  self.loc = nloc
@@ -1851,7 +1837,7 @@ class AbstractBufferedFile(io.IOBase):
1851
1837
  length = self.size - self.loc
1852
1838
  if self.closed:
1853
1839
  raise ValueError("I/O operation on closed file.")
1854
- logger.debug("%s read: %i - %i" % (self, self.loc, self.loc + length))
1840
+ logger.debug("%s read: %i - %i", self, self.loc, self.loc + length)
1855
1841
  if length == 0:
1856
1842
  # don't even bother calling fetch
1857
1843
  return b""
@@ -1966,7 +1952,7 @@ class AbstractBufferedFile(io.IOBase):
1966
1952
  self.close()
1967
1953
 
1968
1954
  def __str__(self):
1969
- return "<File-like object %s, %s>" % (type(self.fs).__name__, self.path)
1955
+ return f"<File-like object {type(self.fs).__name__}, {self.path}>"
1970
1956
 
1971
1957
  __repr__ = __str__
1972
1958
 
@@ -107,9 +107,9 @@ GLOB_EDGE_CASES_TESTS = {
107
107
  "subdir1/subfile2",
108
108
  ],
109
109
  ),
110
- ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
111
  (
112
- "**1",
112
+ "**/*1",
113
113
  True,
114
114
  None,
115
115
  [
@@ -120,14 +120,14 @@ GLOB_EDGE_CASES_TESTS = {
120
120
  "subdir1/nesteddir/nestedfile",
121
121
  ],
122
122
  ),
123
- ("**1", True, 1, ["file1"]),
123
+ ("**/*1", True, 1, ["file1"]),
124
124
  (
125
- "**1",
125
+ "**/*1",
126
126
  True,
127
127
  2,
128
128
  ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
129
  ),
130
- ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
131
  ("**/subdir0", False, None, []),
132
132
  ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
133
  ("**/subdir0/nested*", False, 2, []),
fsspec/transaction.py CHANGED
@@ -1,3 +1,6 @@
1
+ from collections import deque
2
+
3
+
1
4
  class Transaction:
2
5
  """Filesystem transaction write context
3
6
 
@@ -13,10 +16,11 @@ class Transaction:
13
16
  fs: FileSystem instance
14
17
  """
15
18
  self.fs = fs
16
- self.files = []
19
+ self.files = deque()
17
20
 
18
21
  def __enter__(self):
19
22
  self.start()
23
+ return self
20
24
 
21
25
  def __exit__(self, exc_type, exc_val, exc_tb):
22
26
  """End transaction and commit, if exit is not due to exception"""
@@ -27,17 +31,17 @@ class Transaction:
27
31
 
28
32
  def start(self):
29
33
  """Start a transaction on this FileSystem"""
30
- self.files = [] # clean up after previous failed completions
34
+ self.files = deque() # clean up after previous failed completions
31
35
  self.fs._intrans = True
32
36
 
33
37
  def complete(self, commit=True):
34
38
  """Finish transaction: commit or discard all deferred files"""
35
- for f in self.files:
39
+ while self.files:
40
+ f = self.files.popleft()
36
41
  if commit:
37
42
  f.commit()
38
43
  else:
39
44
  f.discard()
40
- self.files = []
41
45
  self.fs._intrans = False
42
46
 
43
47