fsspec 2023.10.0__py3-none-any.whl → 2024.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. fsspec/_version.py +3 -3
  2. fsspec/archive.py +4 -4
  3. fsspec/asyn.py +43 -53
  4. fsspec/caching.py +1 -1
  5. fsspec/callbacks.py +98 -12
  6. fsspec/compression.py +3 -3
  7. fsspec/core.py +16 -3
  8. fsspec/exceptions.py +0 -4
  9. fsspec/generic.py +11 -4
  10. fsspec/gui.py +4 -3
  11. fsspec/implementations/arrow.py +9 -0
  12. fsspec/implementations/cache_mapper.py +2 -6
  13. fsspec/implementations/cached.py +92 -18
  14. fsspec/implementations/data.py +48 -0
  15. fsspec/implementations/dbfs.py +14 -4
  16. fsspec/implementations/dirfs.py +6 -0
  17. fsspec/implementations/ftp.py +18 -13
  18. fsspec/implementations/github.py +17 -5
  19. fsspec/implementations/http.py +42 -51
  20. fsspec/implementations/libarchive.py +2 -3
  21. fsspec/implementations/local.py +11 -4
  22. fsspec/implementations/memory.py +2 -2
  23. fsspec/implementations/reference.py +127 -56
  24. fsspec/implementations/sftp.py +6 -5
  25. fsspec/implementations/smb.py +0 -1
  26. fsspec/implementations/tar.py +2 -1
  27. fsspec/implementations/webhdfs.py +46 -5
  28. fsspec/implementations/zip.py +11 -3
  29. fsspec/parquet.py +3 -5
  30. fsspec/registry.py +2 -1
  31. fsspec/spec.py +51 -61
  32. fsspec/tests/abstract/common.py +5 -5
  33. fsspec/tests/abstract/copy.py +21 -7
  34. fsspec/tests/abstract/put.py +21 -7
  35. fsspec/transaction.py +8 -4
  36. fsspec/utils.py +114 -1
  37. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/METADATA +1 -2
  38. fsspec-2024.2.0.dist-info/RECORD +54 -0
  39. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/WHEEL +1 -1
  40. fsspec-2023.10.0.dist-info/RECORD +0 -53
  41. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/LICENSE +0 -0
  42. {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/top_level.txt +0 -0
fsspec/parquet.py CHANGED
@@ -131,10 +131,8 @@ def open_parquet_file(
131
131
  cache_type="parts",
132
132
  cache_options={
133
133
  **options,
134
- **{
135
- "data": data.get(fn, {}),
136
- "strict": strict,
137
- },
134
+ "data": data.get(fn, {}),
135
+ "strict": strict,
138
136
  },
139
137
  **kwargs,
140
138
  )
@@ -338,7 +336,7 @@ def _transfer_ranges(fs, blocks, paths, starts, ends):
338
336
 
339
337
  def _add_header_magic(data):
340
338
  # Add b"PAR1" to file headers
341
- for i, path in enumerate(list(data.keys())):
339
+ for path in list(data.keys()):
342
340
  add_magic = True
343
341
  for k in data[path].keys():
344
342
  if k[0] == 0 and k[1] >= 4:
fsspec/registry.py CHANGED
@@ -57,9 +57,10 @@ def register_implementation(name, cls, clobber=False, errtxt=None):
57
57
  _registry[name] = cls
58
58
 
59
59
 
60
- # protocols mapped to the class which implements them. This dict can
60
+ # protocols mapped to the class which implements them. This dict can be
61
61
  # updated with register_implementation
62
62
  known_implementations = {
63
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
63
64
  "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
64
65
  "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
65
66
  "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
fsspec/spec.py CHANGED
@@ -11,12 +11,13 @@ from glob import has_magic
11
11
  from hashlib import sha256
12
12
  from typing import ClassVar
13
13
 
14
- from .callbacks import _DEFAULT_CALLBACK
14
+ from .callbacks import DEFAULT_CALLBACK
15
15
  from .config import apply_config, conf
16
16
  from .dircache import DirCache
17
17
  from .transaction import Transaction
18
18
  from .utils import (
19
19
  _unstrip_protocol,
20
+ glob_translate,
20
21
  isfilelike,
21
22
  other_paths,
22
23
  read_block,
@@ -109,6 +110,7 @@ class AbstractFileSystem(metaclass=_Cached):
109
110
  async_impl = False
110
111
  mirror_sync_methods = False
111
112
  root_marker = "" # For some FSs, may require leading '/' or other character
113
+ transaction_type = Transaction
112
114
 
113
115
  #: Extra *class attributes* that should be considered when hashing.
114
116
  _extra_tokenize_attributes = ()
@@ -235,20 +237,20 @@ class AbstractFileSystem(metaclass=_Cached):
235
237
  for the normal and exception cases.
236
238
  """
237
239
  if self._transaction is None:
238
- self._transaction = Transaction(self)
240
+ self._transaction = self.transaction_type(self)
239
241
  return self._transaction
240
242
 
241
243
  def start_transaction(self):
242
244
  """Begin write transaction for deferring files, non-context version"""
243
245
  self._intrans = True
244
- self._transaction = Transaction(self)
246
+ self._transaction = self.transaction_type(self)
245
247
  return self.transaction
246
248
 
247
249
  def end_transaction(self):
248
250
  """Finish write transaction, non-context version"""
249
251
  self.transaction.complete()
250
252
  self._transaction = None
251
- # The invalid cache must be cleared after the transcation is completed.
253
+ # The invalid cache must be cleared after the transaction is completed.
252
254
  for path in self._invalidated_caches_in_transaction:
253
255
  self.invalidate_cache(path)
254
256
  self._invalidated_caches_in_transaction.clear()
@@ -551,10 +553,6 @@ class AbstractFileSystem(metaclass=_Cached):
551
553
 
552
554
  The `maxdepth` option is applied on the first `**` found in the path.
553
555
 
554
- Search path names that contain embedded characters special to this
555
- implementation of glob may not produce expected results;
556
- e.g., ``foo/bar/*starredfilename*``.
557
-
558
556
  kwargs are passed to ``ls``.
559
557
  """
560
558
  if maxdepth is not None and maxdepth < 1:
@@ -562,8 +560,12 @@ class AbstractFileSystem(metaclass=_Cached):
562
560
 
563
561
  import re
564
562
 
565
- ends = path.endswith("/")
563
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
564
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
566
565
  path = self._strip_protocol(path)
566
+ append_slash_to_dirname = ends_with_sep or path.endswith(
567
+ tuple(sep + "**" for sep in seps)
568
+ )
567
569
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
568
570
  idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
569
571
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
@@ -573,11 +575,11 @@ class AbstractFileSystem(metaclass=_Cached):
573
575
  detail = kwargs.pop("detail", False)
574
576
 
575
577
  if not has_magic(path):
576
- if self.exists(path):
578
+ if self.exists(path, **kwargs):
577
579
  if not detail:
578
580
  return [path]
579
581
  else:
580
- return {path: self.info(path)}
582
+ return {path: self.info(path, **kwargs)}
581
583
  else:
582
584
  if not detail:
583
585
  return [] # glob of non-existent returns empty
@@ -600,47 +602,22 @@ class AbstractFileSystem(metaclass=_Cached):
600
602
  depth = None
601
603
 
602
604
  allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
603
- # Escape characters special to python regex, leaving our supported
604
- # special characters in place.
605
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
606
- # for shell globbing details.
607
- pattern = (
608
- "^"
609
- + (
610
- path.replace("\\", r"\\")
611
- .replace(".", r"\.")
612
- .replace("+", r"\+")
613
- .replace("//", "/")
614
- .replace("(", r"\(")
615
- .replace(")", r"\)")
616
- .replace("|", r"\|")
617
- .replace("^", r"\^")
618
- .replace("$", r"\$")
619
- .replace("{", r"\{")
620
- .replace("}", r"\}")
621
- .rstrip("/")
622
- .replace("?", ".")
623
- )
624
- + "$"
625
- )
626
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
627
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
628
- pattern = re.sub("[*]", "[^/]*", pattern)
629
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
630
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
605
+
606
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
631
607
  pattern = re.compile(pattern)
632
608
 
633
609
  out = {
634
- p: allpaths[p]
635
- for p in sorted(allpaths)
636
- if pattern.match(p.replace("//", "/").rstrip("/"))
610
+ p: info
611
+ for p, info in sorted(allpaths.items())
612
+ if pattern.match(
613
+ (
614
+ p + "/"
615
+ if append_slash_to_dirname and info["type"] == "directory"
616
+ else p
617
+ )
618
+ )
637
619
  }
638
620
 
639
- # Return directories only when the glob end by a slash
640
- # This is needed for posix glob compliance
641
- if ends:
642
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
643
-
644
621
  if detail:
645
622
  return out
646
623
  else:
@@ -828,6 +805,16 @@ class AbstractFileSystem(metaclass=_Cached):
828
805
  def cat_ranges(
829
806
  self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
830
807
  ):
808
+ """Get the contents of byte ranges from one or more files
809
+
810
+ Parameters
811
+ ----------
812
+ paths: list
813
+ A list of of filepaths on this filesystems
814
+ starts, ends: int or list
815
+ Bytes limits of the read. If using a single int, the same value will be
816
+ used to read all the specified files.
817
+ """
831
818
  if max_gap is not None:
832
819
  raise NotImplementedError
833
820
  if not isinstance(paths, list):
@@ -835,7 +822,7 @@ class AbstractFileSystem(metaclass=_Cached):
835
822
  if not isinstance(starts, list):
836
823
  starts = [starts] * len(paths)
837
824
  if not isinstance(ends, list):
838
- ends = [starts] * len(paths)
825
+ ends = [ends] * len(paths)
839
826
  if len(starts) != len(paths) or len(ends) != len(paths):
840
827
  raise ValueError
841
828
  out = []
@@ -889,9 +876,7 @@ class AbstractFileSystem(metaclass=_Cached):
889
876
  else:
890
877
  return self.cat_file(paths[0], **kwargs)
891
878
 
892
- def get_file(
893
- self, rpath, lpath, callback=_DEFAULT_CALLBACK, outfile=None, **kwargs
894
- ):
879
+ def get_file(self, rpath, lpath, callback=DEFAULT_CALLBACK, outfile=None, **kwargs):
895
880
  """Copy single remote file to local"""
896
881
  from .implementations.local import LocalFileSystem
897
882
 
@@ -901,7 +886,8 @@ class AbstractFileSystem(metaclass=_Cached):
901
886
  os.makedirs(lpath, exist_ok=True)
902
887
  return None
903
888
 
904
- LocalFileSystem(auto_mkdir=True).makedirs(self._parent(lpath), exist_ok=True)
889
+ fs = LocalFileSystem(auto_mkdir=True)
890
+ fs.makedirs(fs._parent(lpath), exist_ok=True)
905
891
 
906
892
  with self.open(rpath, "rb", **kwargs) as f1:
907
893
  if outfile is None:
@@ -925,7 +911,7 @@ class AbstractFileSystem(metaclass=_Cached):
925
911
  rpath,
926
912
  lpath,
927
913
  recursive=False,
928
- callback=_DEFAULT_CALLBACK,
914
+ callback=DEFAULT_CALLBACK,
929
915
  maxdepth=None,
930
916
  **kwargs,
931
917
  ):
@@ -979,10 +965,10 @@ class AbstractFileSystem(metaclass=_Cached):
979
965
 
980
966
  callback.set_size(len(lpaths))
981
967
  for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
982
- callback.branch(rpath, lpath, kwargs)
983
- self.get_file(rpath, lpath, **kwargs)
968
+ with callback.branched(rpath, lpath) as child:
969
+ self.get_file(rpath, lpath, callback=child, **kwargs)
984
970
 
985
- def put_file(self, lpath, rpath, callback=_DEFAULT_CALLBACK, **kwargs):
971
+ def put_file(self, lpath, rpath, callback=DEFAULT_CALLBACK, **kwargs):
986
972
  """Copy single file to remote"""
987
973
  if os.path.isdir(lpath):
988
974
  self.makedirs(rpath, exist_ok=True)
@@ -1007,7 +993,7 @@ class AbstractFileSystem(metaclass=_Cached):
1007
993
  lpath,
1008
994
  rpath,
1009
995
  recursive=False,
1010
- callback=_DEFAULT_CALLBACK,
996
+ callback=DEFAULT_CALLBACK,
1011
997
  maxdepth=None,
1012
998
  **kwargs,
1013
999
  ):
@@ -1065,8 +1051,8 @@ class AbstractFileSystem(metaclass=_Cached):
1065
1051
 
1066
1052
  callback.set_size(len(rpaths))
1067
1053
  for lpath, rpath in callback.wrap(zip(lpaths, rpaths)):
1068
- callback.branch(lpath, rpath, kwargs)
1069
- self.put_file(lpath, rpath, **kwargs)
1054
+ with callback.branched(lpath, rpath) as child:
1055
+ self.put_file(lpath, rpath, callback=child, **kwargs)
1070
1056
 
1071
1057
  def head(self, path, size=1024):
1072
1058
  """Get the first ``size`` bytes from file"""
@@ -1146,7 +1132,7 @@ class AbstractFileSystem(metaclass=_Cached):
1146
1132
  if maxdepth is not None and maxdepth < 1:
1147
1133
  raise ValueError("maxdepth must be at least 1")
1148
1134
 
1149
- if isinstance(path, str):
1135
+ if isinstance(path, (str, os.PathLike)):
1150
1136
  out = self.expand_path([path], recursive, maxdepth)
1151
1137
  else:
1152
1138
  out = set()
@@ -1412,7 +1398,9 @@ class AbstractFileSystem(metaclass=_Cached):
1412
1398
  )
1413
1399
  return json.dumps(
1414
1400
  dict(
1415
- **{"cls": cls, "protocol": proto, "args": self.storage_args},
1401
+ cls=cls,
1402
+ protocol=proto,
1403
+ args=self.storage_args,
1416
1404
  **self.storage_options,
1417
1405
  )
1418
1406
  )
@@ -1703,6 +1691,8 @@ class AbstractBufferedFile(io.IOBase):
1703
1691
 
1704
1692
  def __eq__(self, other):
1705
1693
  """Files are equal if they have the same checksum, only in read mode"""
1694
+ if self is other:
1695
+ return True
1706
1696
  return self.mode == "rb" and other.mode == "rb" and hash(self) == hash(other)
1707
1697
 
1708
1698
  def commit(self):
@@ -107,9 +107,9 @@ GLOB_EDGE_CASES_TESTS = {
107
107
  "subdir1/subfile2",
108
108
  ],
109
109
  ),
110
- ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
111
  (
112
- "**1",
112
+ "**/*1",
113
113
  True,
114
114
  None,
115
115
  [
@@ -120,14 +120,14 @@ GLOB_EDGE_CASES_TESTS = {
120
120
  "subdir1/nesteddir/nestedfile",
121
121
  ],
122
122
  ),
123
- ("**1", True, 1, ["file1"]),
123
+ ("**/*1", True, 1, ["file1"]),
124
124
  (
125
- "**1",
125
+ "**/*1",
126
126
  True,
127
127
  2,
128
128
  ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
129
  ),
130
- ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
131
  ("**/subdir0", False, None, []),
132
132
  ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
133
  ("**/subdir0/nested*", False, 2, []),
@@ -128,7 +128,9 @@ class AbstractCopyTests:
128
128
 
129
129
  # Without recursive does nothing
130
130
  fs.cp(s, t)
131
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
131
+ assert fs.ls(target, detail=False) == (
132
+ [] if supports_empty_directories else [dummy]
133
+ )
132
134
 
133
135
  # With recursive
134
136
  fs.cp(s, t, recursive=True)
@@ -155,7 +157,9 @@ class AbstractCopyTests:
155
157
  assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
156
158
 
157
159
  fs.rm(fs_join(target, "subdir"), recursive=True)
158
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
160
+ assert fs.ls(target, detail=False) == (
161
+ [] if supports_empty_directories else [dummy]
162
+ )
159
163
 
160
164
  # Limit recursive by maxdepth
161
165
  fs.cp(s, t, recursive=True, maxdepth=1)
@@ -179,7 +183,9 @@ class AbstractCopyTests:
179
183
  assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
180
184
 
181
185
  fs.rm(fs_join(target, "subdir"), recursive=True)
182
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
186
+ assert fs.ls(target, detail=False) == (
187
+ [] if supports_empty_directories else [dummy]
188
+ )
183
189
 
184
190
  def test_copy_directory_to_new_directory(
185
191
  self,
@@ -271,7 +277,9 @@ class AbstractCopyTests:
271
277
  ],
272
278
  recursive=True,
273
279
  )
274
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
280
+ assert fs.ls(target, detail=False) == (
281
+ [] if supports_empty_directories else [dummy]
282
+ )
275
283
 
276
284
  # With recursive
277
285
  for glob, recursive in zip(["*", "**"], [True, False]):
@@ -290,7 +298,9 @@ class AbstractCopyTests:
290
298
  ],
291
299
  recursive=True,
292
300
  )
293
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
301
+ assert fs.ls(target, detail=False) == (
302
+ [] if supports_empty_directories else [dummy]
303
+ )
294
304
 
295
305
  # Limit recursive by maxdepth
296
306
  fs.cp(
@@ -308,7 +318,9 @@ class AbstractCopyTests:
308
318
  ],
309
319
  recursive=True,
310
320
  )
311
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
321
+ assert fs.ls(target, detail=False) == (
322
+ [] if supports_empty_directories else [dummy]
323
+ )
312
324
 
313
325
  def test_copy_glob_to_new_directory(
314
326
  self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
@@ -451,7 +463,9 @@ class AbstractCopyTests:
451
463
  ],
452
464
  recursive=True,
453
465
  )
454
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
466
+ assert fs.ls(target, detail=False) == (
467
+ [] if supports_empty_directories else [dummy]
468
+ )
455
469
 
456
470
  def test_copy_list_of_files_to_new_directory(
457
471
  self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target
@@ -131,7 +131,9 @@ class AbstractPutTests:
131
131
 
132
132
  # Without recursive does nothing
133
133
  fs.put(s, t)
134
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
134
+ assert fs.ls(target, detail=False) == (
135
+ [] if supports_empty_directories else [dummy]
136
+ )
135
137
 
136
138
  # With recursive
137
139
  fs.put(s, t, recursive=True)
@@ -158,7 +160,9 @@ class AbstractPutTests:
158
160
  assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile"))
159
161
 
160
162
  fs.rm(fs_join(target, "subdir"), recursive=True)
161
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
163
+ assert fs.ls(target, detail=False) == (
164
+ [] if supports_empty_directories else [dummy]
165
+ )
162
166
 
163
167
  # Limit recursive by maxdepth
164
168
  fs.put(s, t, recursive=True, maxdepth=1)
@@ -182,7 +186,9 @@ class AbstractPutTests:
182
186
  assert not fs.exists(fs_join(target, "subdir", "nesteddir"))
183
187
 
184
188
  fs.rm(fs_join(target, "subdir"), recursive=True)
185
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
189
+ assert fs.ls(target, detail=False) == (
190
+ [] if supports_empty_directories else [dummy]
191
+ )
186
192
 
187
193
  def test_put_directory_to_new_directory(
188
194
  self,
@@ -275,7 +281,9 @@ class AbstractPutTests:
275
281
  ],
276
282
  recursive=True,
277
283
  )
278
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
284
+ assert fs.ls(target, detail=False) == (
285
+ [] if supports_empty_directories else [dummy]
286
+ )
279
287
 
280
288
  # With recursive
281
289
  for glob, recursive in zip(["*", "**"], [True, False]):
@@ -294,7 +302,9 @@ class AbstractPutTests:
294
302
  ],
295
303
  recursive=True,
296
304
  )
297
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
305
+ assert fs.ls(target, detail=False) == (
306
+ [] if supports_empty_directories else [dummy]
307
+ )
298
308
 
299
309
  # Limit recursive by maxdepth
300
310
  fs.put(
@@ -315,7 +325,9 @@ class AbstractPutTests:
315
325
  ],
316
326
  recursive=True,
317
327
  )
318
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
328
+ assert fs.ls(target, detail=False) == (
329
+ [] if supports_empty_directories else [dummy]
330
+ )
319
331
 
320
332
  def test_put_glob_to_new_directory(
321
333
  self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
@@ -463,7 +475,9 @@ class AbstractPutTests:
463
475
  ],
464
476
  recursive=True,
465
477
  )
466
- assert fs.ls(target) == ([] if supports_empty_directories else [dummy])
478
+ assert fs.ls(target, detail=False) == (
479
+ [] if supports_empty_directories else [dummy]
480
+ )
467
481
 
468
482
  def test_put_list_of_files_to_new_directory(
469
483
  self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0
fsspec/transaction.py CHANGED
@@ -1,3 +1,6 @@
1
+ from collections import deque
2
+
3
+
1
4
  class Transaction:
2
5
  """Filesystem transaction write context
3
6
 
@@ -13,10 +16,11 @@ class Transaction:
13
16
  fs: FileSystem instance
14
17
  """
15
18
  self.fs = fs
16
- self.files = []
19
+ self.files = deque()
17
20
 
18
21
  def __enter__(self):
19
22
  self.start()
23
+ return self
20
24
 
21
25
  def __exit__(self, exc_type, exc_val, exc_tb):
22
26
  """End transaction and commit, if exit is not due to exception"""
@@ -27,17 +31,17 @@ class Transaction:
27
31
 
28
32
  def start(self):
29
33
  """Start a transaction on this FileSystem"""
30
- self.files = [] # clean up after previous failed completions
34
+ self.files = deque() # clean up after previous failed completions
31
35
  self.fs._intrans = True
32
36
 
33
37
  def complete(self, commit=True):
34
38
  """Finish transaction: commit or discard all deferred files"""
35
- for f in self.files:
39
+ while self.files:
40
+ f = self.files.popleft()
36
41
  if commit:
37
42
  f.commit()
38
43
  else:
39
44
  f.discard()
40
- self.files = []
41
45
  self.fs._intrans = False
42
46
 
43
47
 
fsspec/utils.py CHANGED
@@ -320,7 +320,7 @@ def tokenize(*args: Any, **kwargs: Any) -> str:
320
320
  h = md5(str(args).encode())
321
321
  except ValueError:
322
322
  # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
323
- h = md5(str(args).encode(), usedforsecurity=False) # type: ignore[call-arg]
323
+ h = md5(str(args).encode(), usedforsecurity=False)
324
324
  return h.hexdigest()
325
325
 
326
326
 
@@ -436,6 +436,7 @@ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
436
436
 
437
437
 
438
438
  def get_protocol(url: str) -> str:
439
+ url = stringify_path(url)
439
440
  parts = re.split(r"(\:\:|\://)", url, 1)
440
441
  if len(parts) > 1:
441
442
  return parts[0]
@@ -627,3 +628,115 @@ def atomic_write(path: str, mode: str = "wb"):
627
628
  raise
628
629
  else:
629
630
  os.replace(fn, path)
631
+
632
+
633
+ def _translate(pat, STAR, QUESTION_MARK):
634
+ # Copied from: https://github.com/python/cpython/pull/106703.
635
+ res: list[str] = []
636
+ add = res.append
637
+ i, n = 0, len(pat)
638
+ while i < n:
639
+ c = pat[i]
640
+ i = i + 1
641
+ if c == "*":
642
+ # compress consecutive `*` into one
643
+ if (not res) or res[-1] is not STAR:
644
+ add(STAR)
645
+ elif c == "?":
646
+ add(QUESTION_MARK)
647
+ elif c == "[":
648
+ j = i
649
+ if j < n and pat[j] == "!":
650
+ j = j + 1
651
+ if j < n and pat[j] == "]":
652
+ j = j + 1
653
+ while j < n and pat[j] != "]":
654
+ j = j + 1
655
+ if j >= n:
656
+ add("\\[")
657
+ else:
658
+ stuff = pat[i:j]
659
+ if "-" not in stuff:
660
+ stuff = stuff.replace("\\", r"\\")
661
+ else:
662
+ chunks = []
663
+ k = i + 2 if pat[i] == "!" else i + 1
664
+ while True:
665
+ k = pat.find("-", k, j)
666
+ if k < 0:
667
+ break
668
+ chunks.append(pat[i:k])
669
+ i = k + 1
670
+ k = k + 3
671
+ chunk = pat[i:j]
672
+ if chunk:
673
+ chunks.append(chunk)
674
+ else:
675
+ chunks[-1] += "-"
676
+ # Remove empty ranges -- invalid in RE.
677
+ for k in range(len(chunks) - 1, 0, -1):
678
+ if chunks[k - 1][-1] > chunks[k][0]:
679
+ chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
680
+ del chunks[k]
681
+ # Escape backslashes and hyphens for set difference (--).
682
+ # Hyphens that create ranges shouldn't be escaped.
683
+ stuff = "-".join(
684
+ s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
685
+ )
686
+ # Escape set operations (&&, ~~ and ||).
687
+ stuff = re.sub(r"([&~|])", r"\\\1", stuff)
688
+ i = j + 1
689
+ if not stuff:
690
+ # Empty range: never match.
691
+ add("(?!)")
692
+ elif stuff == "!":
693
+ # Negated empty range: match any character.
694
+ add(".")
695
+ else:
696
+ if stuff[0] == "!":
697
+ stuff = "^" + stuff[1:]
698
+ elif stuff[0] in ("^", "["):
699
+ stuff = "\\" + stuff
700
+ add(f"[{stuff}]")
701
+ else:
702
+ add(re.escape(c))
703
+ assert i == n
704
+ return res
705
+
706
+
707
+ def glob_translate(pat):
708
+ # Copied from: https://github.com/python/cpython/pull/106703.
709
+ # The keyword parameters' values are fixed to:
710
+ # recursive=True, include_hidden=True, seps=None
711
+ """Translate a pathname with shell wildcards to a regular expression."""
712
+ if os.path.altsep:
713
+ seps = os.path.sep + os.path.altsep
714
+ else:
715
+ seps = os.path.sep
716
+ escaped_seps = "".join(map(re.escape, seps))
717
+ any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
718
+ not_sep = f"[^{escaped_seps}]"
719
+ one_last_segment = f"{not_sep}+"
720
+ one_segment = f"{one_last_segment}{any_sep}"
721
+ any_segments = f"(?:.+{any_sep})?"
722
+ any_last_segments = ".*"
723
+ results = []
724
+ parts = re.split(any_sep, pat)
725
+ last_part_idx = len(parts) - 1
726
+ for idx, part in enumerate(parts):
727
+ if part == "*":
728
+ results.append(one_segment if idx < last_part_idx else one_last_segment)
729
+ continue
730
+ if part == "**":
731
+ results.append(any_segments if idx < last_part_idx else any_last_segments)
732
+ continue
733
+ elif "**" in part:
734
+ raise ValueError(
735
+ "Invalid pattern: '**' can only be an entire path component"
736
+ )
737
+ if part:
738
+ results.extend(_translate(part, f"{not_sep}*", not_sep))
739
+ if idx < last_part_idx:
740
+ results.append(any_sep)
741
+ res = "".join(results)
742
+ return rf"(?s:{res})\Z"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fsspec
3
- Version: 2023.10.0
3
+ Version: 2024.2.0
4
4
  Summary: File-system specification
5
5
  Home-page: https://github.com/fsspec/filesystem_spec
6
6
  Maintainer: Martin Durant
@@ -71,7 +71,6 @@ Requires-Dist: panel ; extra == 'gui'
71
71
  Provides-Extra: hdfs
72
72
  Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
73
73
  Provides-Extra: http
74
- Requires-Dist: requests ; extra == 'http'
75
74
  Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
76
75
  Provides-Extra: libarchive
77
76
  Requires-Dist: libarchive-c ; extra == 'libarchive'