fsspec 2023.10.0__py3-none-any.whl → 2023.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/_version.py CHANGED
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-10-21T13:35:51-0400",
11
+ "date": "2023-12-02T20:51:30-0500",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "e20f626b87b5bb87d223495a56aefd768272a7ca",
15
- "version": "2023.10.0"
14
+ "full-revisionid": "5cf9cd952c5d276835d3caef9c32fcf69d55b10c",
15
+ "version": "2023.12.0"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
fsspec/asyn.py CHANGED
@@ -15,7 +15,7 @@ from .callbacks import _DEFAULT_CALLBACK
15
15
  from .exceptions import FSTimeoutError
16
16
  from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
17
  from .spec import AbstractBufferedFile, AbstractFileSystem
18
- from .utils import is_exception, other_paths
18
+ from .utils import glob_translate, is_exception, other_paths
19
19
 
20
20
  private = re.compile("_[^_]")
21
21
  iothread = [None] # dedicated fsspec IO thread
@@ -106,7 +106,7 @@ def sync(loop, func, *args, timeout=None, **kwargs):
106
106
 
107
107
 
108
108
  def sync_wrapper(func, obj=None):
109
- """Given a function, make so can be called in async or blocking contexts
109
+ """Given a function, make so can be called in blocking contexts
110
110
 
111
111
  Leave obj=None if defining within a class. Pass the instance if attaching
112
112
  as an attribute of the instance.
@@ -467,6 +467,16 @@ class AsyncFileSystem(AbstractFileSystem):
467
467
  on_error="return",
468
468
  **kwargs,
469
469
  ):
470
+ """Get the contents of byte ranges from one or more files
471
+
472
+ Parameters
473
+ ----------
474
+ paths: list
475
+ A list of of filepaths on this filesystems
476
+ starts, ends: int or list
477
+ Bytes limits of the read. If using a single int, the same value will be
478
+ used to read all the specified files.
479
+ """
470
480
  # TODO: on_error
471
481
  if max_gap is not None:
472
482
  # use utils.merge_offset_ranges
@@ -476,7 +486,7 @@ class AsyncFileSystem(AbstractFileSystem):
476
486
  if not isinstance(starts, Iterable):
477
487
  starts = [starts] * len(paths)
478
488
  if not isinstance(ends, Iterable):
479
- ends = [starts] * len(paths)
489
+ ends = [ends] * len(paths)
480
490
  if len(starts) != len(paths) or len(ends) != len(paths):
481
491
  raise ValueError
482
492
  coros = [
@@ -662,9 +672,9 @@ class AsyncFileSystem(AbstractFileSystem):
662
672
  [self._size(p) for p in paths], batch_size=batch_size
663
673
  )
664
674
 
665
- async def _exists(self, path):
675
+ async def _exists(self, path, **kwargs):
666
676
  try:
667
- await self._info(path)
677
+ await self._info(path, **kwargs)
668
678
  return True
669
679
  except FileNotFoundError:
670
680
  return False
@@ -735,8 +745,12 @@ class AsyncFileSystem(AbstractFileSystem):
735
745
 
736
746
  import re
737
747
 
738
- ends = path.endswith("/")
748
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
749
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
739
750
  path = self._strip_protocol(path)
751
+ append_slash_to_dirname = ends_with_sep or path.endswith(
752
+ tuple(sep + "**" for sep in seps)
753
+ )
740
754
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
741
755
  idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
742
756
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
@@ -746,11 +760,11 @@ class AsyncFileSystem(AbstractFileSystem):
746
760
  detail = kwargs.pop("detail", False)
747
761
 
748
762
  if not has_magic(path):
749
- if await self._exists(path):
763
+ if await self._exists(path, **kwargs):
750
764
  if not detail:
751
765
  return [path]
752
766
  else:
753
- return {path: await self._info(path)}
767
+ return {path: await self._info(path, **kwargs)}
754
768
  else:
755
769
  if not detail:
756
770
  return [] # glob of non-existent returns empty
@@ -775,46 +789,22 @@ class AsyncFileSystem(AbstractFileSystem):
775
789
  allpaths = await self._find(
776
790
  root, maxdepth=depth, withdirs=True, detail=True, **kwargs
777
791
  )
778
- # Escape characters special to python regex, leaving our supported
779
- # special characters in place.
780
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
781
- # for shell globbing details.
782
- pattern = (
783
- "^"
784
- + (
785
- path.replace("\\", r"\\")
786
- .replace(".", r"\.")
787
- .replace("+", r"\+")
788
- .replace("//", "/")
789
- .replace("(", r"\(")
790
- .replace(")", r"\)")
791
- .replace("|", r"\|")
792
- .replace("^", r"\^")
793
- .replace("$", r"\$")
794
- .replace("{", r"\{")
795
- .replace("}", r"\}")
796
- .rstrip("/")
797
- .replace("?", ".")
798
- )
799
- + "$"
800
- )
801
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
802
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
803
- pattern = re.sub("[*]", "[^/]*", pattern)
804
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
805
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
792
+
793
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
806
794
  pattern = re.compile(pattern)
795
+
807
796
  out = {
808
- p: allpaths[p]
809
- for p in sorted(allpaths)
810
- if pattern.match(p.replace("//", "/").rstrip("/"))
797
+ p: info
798
+ for p, info in sorted(allpaths.items())
799
+ if pattern.match(
800
+ (
801
+ p + "/"
802
+ if append_slash_to_dirname and info["type"] == "directory"
803
+ else p
804
+ )
805
+ )
811
806
  }
812
807
 
813
- # Return directories only when the glob end by a slash
814
- # This is needed for posix glob compliance
815
- if ends:
816
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
817
-
818
808
  if detail:
819
809
  return out
820
810
  else:
fsspec/caching.py CHANGED
@@ -111,7 +111,7 @@ class MMapCache(BaseCache):
111
111
  fd.write(b"1")
112
112
  fd.flush()
113
113
  else:
114
- fd = open(self.location, "rb+")
114
+ fd = open(self.location, "r+b")
115
115
 
116
116
  return mmap.mmap(fd.fileno(), self.size)
117
117
 
fsspec/core.py CHANGED
@@ -1,8 +1,11 @@
1
+ from __future__ import annotations
2
+
1
3
  import io
2
4
  import logging
3
5
  import os
4
6
  import re
5
7
  from glob import has_magic
8
+ from pathlib import Path
6
9
 
7
10
  # for backwards compat, we export cache things from here too
8
11
  from .caching import ( # noqa: F401
@@ -290,7 +293,11 @@ def open_files(
290
293
  fs.auto_mkdir = auto_mkdir
291
294
  elif "r" not in mode and auto_mkdir:
292
295
  parents = {fs._parent(path) for path in paths}
293
- [fs.makedirs(parent, exist_ok=True) for parent in parents]
296
+ for parent in parents:
297
+ try:
298
+ fs.makedirs(parent, exist_ok=True)
299
+ except PermissionError:
300
+ pass
294
301
  return OpenFiles(
295
302
  [
296
303
  OpenFile(
@@ -465,7 +472,11 @@ def open(
465
472
  return out[0]
466
473
 
467
474
 
468
- def open_local(url, mode="rb", **storage_options):
475
+ def open_local(
476
+ url: str | list[str] | Path | list[Path],
477
+ mode: str = "rb",
478
+ **storage_options: dict,
479
+ ) -> str | list[str]:
469
480
  """Open file(s) which can be resolved to local
470
481
 
471
482
  For files which either are local, or get downloaded upon open
@@ -489,7 +500,7 @@ def open_local(url, mode="rb", **storage_options):
489
500
  )
490
501
  with of as files:
491
502
  paths = [f.name for f in files]
492
- if isinstance(url, str) and not has_magic(url):
503
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
493
504
  return paths[0]
494
505
  return paths
495
506
 
@@ -510,6 +521,8 @@ def split_protocol(urlpath):
510
521
  if len(protocol) > 1:
511
522
  # excludes Windows paths
512
523
  return protocol, path
524
+ if ":" in urlpath and urlpath.find(":") > 1:
525
+ return urlpath.split(":", 1)
513
526
  return None, urlpath
514
527
 
515
528
 
fsspec/generic.py CHANGED
@@ -171,6 +171,10 @@ class GenericFileSystem(AsyncFileSystem):
171
171
  self.method = default_method
172
172
  super().__init__(**kwargs)
173
173
 
174
+ def _parent(self, path):
175
+ fs = _resolve_fs(path, self.method)
176
+ return fs.unstrip_protocol(fs._parent(path))
177
+
174
178
  def _strip_protocol(self, path):
175
179
  # normalization only
176
180
  fs = _resolve_fs(path, self.method)
fsspec/gui.py CHANGED
@@ -242,7 +242,7 @@ class FileSelector(SigSlot):
242
242
  else:
243
243
  self.init_protocol, url = "file", os.getcwd()
244
244
  self.init_url = url
245
- self.init_kwargs = kwargs or "{}"
245
+ self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
246
246
  self.filters = filters
247
247
  self.ignore = [re.compile(i) for i in ignore or []]
248
248
  self._fs = None
@@ -17,6 +17,7 @@ from fsspec.exceptions import BlocksizeMismatchError
17
17
  from fsspec.implementations.cache_mapper import create_cache_mapper
18
18
  from fsspec.implementations.cache_metadata import CacheMetadata
19
19
  from fsspec.spec import AbstractBufferedFile
20
+ from fsspec.transaction import Transaction
20
21
  from fsspec.utils import infer_compression
21
22
 
22
23
  if TYPE_CHECKING:
@@ -25,6 +26,16 @@ if TYPE_CHECKING:
25
26
  logger = logging.getLogger("fsspec.cached")
26
27
 
27
28
 
29
+ class WriteCachedTransaction(Transaction):
30
+ def complete(self, commit=True):
31
+ rpaths = [f.path for f in self.files]
32
+ lpaths = [f.fn for f in self.files]
33
+ if commit:
34
+ self.fs.put(lpaths, rpaths)
35
+ # else remove?
36
+ self.fs._intrans = False
37
+
38
+
28
39
  class CachingFileSystem(AbstractFileSystem):
29
40
  """Locally caching filesystem, layer over any other FS
30
41
 
@@ -355,6 +366,9 @@ class CachingFileSystem(AbstractFileSystem):
355
366
  self.save_cache()
356
367
  return f
357
368
 
369
+ def _parent(self, path):
370
+ return self.fs._parent(path)
371
+
358
372
  def hash_name(self, path: str, *args: Any) -> str:
359
373
  # Kept for backward compatibility with downstream libraries.
360
374
  # Ignores extra arguments, previously same_name boolean.
@@ -390,6 +404,7 @@ class CachingFileSystem(AbstractFileSystem):
390
404
  "open",
391
405
  "cat",
392
406
  "cat_file",
407
+ "cat_ranges",
393
408
  "get",
394
409
  "read_block",
395
410
  "tail",
@@ -411,6 +426,10 @@ class CachingFileSystem(AbstractFileSystem):
411
426
  "__eq__",
412
427
  "to_json",
413
428
  "cache_size",
429
+ "pipe_file",
430
+ "pipe",
431
+ "start_transaction",
432
+ "end_transaction",
414
433
  ]:
415
434
  # all the methods defined in this class. Note `open` here, since
416
435
  # it calls `_open`, but is actually in superclass
@@ -419,7 +438,10 @@ class CachingFileSystem(AbstractFileSystem):
419
438
  )
420
439
  if item in ["__reduce_ex__"]:
421
440
  raise AttributeError
422
- if item in ["_cache"]:
441
+ if item in ["transaction"]:
442
+ # property
443
+ return type(self).transaction.__get__(self)
444
+ if item in ["_cache", "transaction_type"]:
423
445
  # class attributes
424
446
  return getattr(type(self), item)
425
447
  if item == "__class__":
@@ -508,7 +530,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
508
530
  self._mkcache()
509
531
  else:
510
532
  return [
511
- LocalTempFile(self.fs, path, mode=open_files.mode) for path in paths
533
+ LocalTempFile(
534
+ self.fs,
535
+ path,
536
+ mode=open_files.mode,
537
+ fn=os.path.join(self.storage[-1], self._mapper(path)),
538
+ )
539
+ for path in paths
512
540
  ]
513
541
 
514
542
  if self.compression:
@@ -621,7 +649,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
621
649
  def _open(self, path, mode="rb", **kwargs):
622
650
  path = self._strip_protocol(path)
623
651
  if "r" not in mode:
624
- return LocalTempFile(self, path, mode=mode)
652
+ fn = self._make_local_details(path)
653
+ return LocalTempFile(self, path, mode=mode, fn=fn)
625
654
  detail = self._check_file(path)
626
655
  if detail:
627
656
  detail, fn = detail
@@ -688,6 +717,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
688
717
 
689
718
  protocol = "simplecache"
690
719
  local_file = True
720
+ transaction_type = WriteCachedTransaction
691
721
 
692
722
  def __init__(self, **kwargs):
693
723
  kw = kwargs.copy()
@@ -712,16 +742,46 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
712
742
  def load_cache(self):
713
743
  pass
714
744
 
745
+ def pipe_file(self, path, value=None, **kwargs):
746
+ if self._intrans:
747
+ with self.open(path, "wb") as f:
748
+ f.write(value)
749
+ else:
750
+ super().pipe_file(path, value)
751
+
752
+ def pipe(self, path, value=None, **kwargs):
753
+ if isinstance(path, str):
754
+ self.pipe_file(self._strip_protocol(path), value, **kwargs)
755
+ elif isinstance(path, dict):
756
+ for k, v in path.items():
757
+ self.pipe_file(self._strip_protocol(k), v, **kwargs)
758
+ else:
759
+ raise ValueError("path must be str or dict")
760
+
761
+ def cat_ranges(
762
+ self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
763
+ ):
764
+ lpaths = [self._check_file(p) for p in paths]
765
+ rpaths = [p for l, p in zip(lpaths, paths) if l is False]
766
+ lpaths = [l for l, p in zip(lpaths, paths) if l is False]
767
+ self.fs.get(rpaths, lpaths)
768
+ return super().cat_ranges(
769
+ paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
770
+ )
771
+
715
772
  def _open(self, path, mode="rb", **kwargs):
716
773
  path = self._strip_protocol(path)
774
+ sha = self._mapper(path)
717
775
 
718
776
  if "r" not in mode:
719
- return LocalTempFile(self, path, mode=mode)
777
+ fn = os.path.join(self.storage[-1], sha)
778
+ return LocalTempFile(
779
+ self, path, mode=mode, autocommit=not self._intrans, fn=fn
780
+ )
720
781
  fn = self._check_file(path)
721
782
  if fn:
722
783
  return open(fn, mode)
723
784
 
724
- sha = self._mapper(path)
725
785
  fn = os.path.join(self.storage[-1], sha)
726
786
  logger.debug("Copying %s to local cache", path)
727
787
  kwargs["mode"] = mode
@@ -752,13 +812,9 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
752
812
  class LocalTempFile:
753
813
  """A temporary local file, which will be uploaded on commit"""
754
814
 
755
- def __init__(self, fs, path, fn=None, mode="wb", autocommit=True, seek=0):
756
- if fn:
757
- self.fn = fn
758
- self.fh = open(fn, mode)
759
- else:
760
- fd, self.fn = tempfile.mkstemp()
761
- self.fh = open(fd, mode)
815
+ def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
816
+ self.fn = fn
817
+ self.fh = open(fn, mode)
762
818
  self.mode = mode
763
819
  if seek:
764
820
  self.fh.seek(seek)
@@ -768,10 +824,10 @@ class LocalTempFile:
768
824
  self.autocommit = autocommit
769
825
 
770
826
  def __reduce__(self):
771
- # always open in rb+ to allow continuing writing at a location
827
+ # always open in r+b to allow continuing writing at a location
772
828
  return (
773
829
  LocalTempFile,
774
- (self.fs, self.path, self.fn, "rb+", self.autocommit, self.tell()),
830
+ (self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
775
831
  )
776
832
 
777
833
  def __enter__(self):
@@ -0,0 +1,48 @@
1
+ import base64
2
+ import io
3
+ from urllib.parse import unquote
4
+
5
+ from fsspec import AbstractFileSystem
6
+
7
+
8
+ class DataFileSystem(AbstractFileSystem):
9
+ """A handy decoder for data-URLs
10
+
11
+ Example
12
+ -------
13
+ >>> with fsspec.open("data:,Hello%2C%20World%21") as f:
14
+ ... print(f.read())
15
+ b"Hello, World!"
16
+
17
+ """
18
+
19
+ protocol = "data"
20
+
21
+ def __init__(self, **kwargs):
22
+ """No parameters for this filesystem"""
23
+ super().__init__(**kwargs)
24
+
25
+ def cat_file(self, path, start=None, end=None, **kwargs):
26
+ pref, data = path.split(",", 1)
27
+ if pref.endswith("base64"):
28
+ return base64.b64decode(data)[start:end]
29
+ return unquote(data).encode()[start:end]
30
+
31
+ def info(self, path, **kwargs):
32
+ pref, name = path.split(",", 1)
33
+ data = self.cat_file(path)
34
+ mime = pref.split(":", 1)[1].split(";", 1)[0]
35
+ return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
36
+
37
+ def _open(
38
+ self,
39
+ path,
40
+ mode="rb",
41
+ block_size=None,
42
+ autocommit=True,
43
+ cache_options=None,
44
+ **kwargs,
45
+ ):
46
+ if "r" not in mode:
47
+ raise ValueError("Read only filesystem")
48
+ return io.BytesIO(self.cat_file(path))
@@ -14,7 +14,13 @@ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_w
14
14
  from fsspec.callbacks import _DEFAULT_CALLBACK
15
15
  from fsspec.exceptions import FSTimeoutError
16
16
  from fsspec.spec import AbstractBufferedFile
17
- from fsspec.utils import DEFAULT_BLOCK_SIZE, isfilelike, nullcontext, tokenize
17
+ from fsspec.utils import (
18
+ DEFAULT_BLOCK_SIZE,
19
+ glob_translate,
20
+ isfilelike,
21
+ nullcontext,
22
+ tokenize,
23
+ )
18
24
 
19
25
  from ..caching import AllBytes
20
26
 
@@ -441,8 +447,9 @@ class HTTPFileSystem(AsyncFileSystem):
441
447
  raise ValueError("maxdepth must be at least 1")
442
448
  import re
443
449
 
444
- ends = path.endswith("/")
450
+ ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
445
451
  path = self._strip_protocol(path)
452
+ append_slash_to_dirname = ends_with_slash or path.endswith("/**")
446
453
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
447
454
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
448
455
 
@@ -451,11 +458,11 @@ class HTTPFileSystem(AsyncFileSystem):
451
458
  detail = kwargs.pop("detail", False)
452
459
 
453
460
  if not has_magic(path):
454
- if await self._exists(path):
461
+ if await self._exists(path, **kwargs):
455
462
  if not detail:
456
463
  return [path]
457
464
  else:
458
- return {path: await self._info(path)}
465
+ return {path: await self._info(path, **kwargs)}
459
466
  else:
460
467
  if not detail:
461
468
  return [] # glob of non-existent returns empty
@@ -480,45 +487,22 @@ class HTTPFileSystem(AsyncFileSystem):
480
487
  allpaths = await self._find(
481
488
  root, maxdepth=depth, withdirs=True, detail=True, **kwargs
482
489
  )
483
- # Escape characters special to python regex, leaving our supported
484
- # special characters in place.
485
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
486
- # for shell globbing details.
487
- pattern = (
488
- "^"
489
- + (
490
- path.replace("\\", r"\\")
491
- .replace(".", r"\.")
492
- .replace("+", r"\+")
493
- .replace("//", "/")
494
- .replace("(", r"\(")
495
- .replace(")", r"\)")
496
- .replace("|", r"\|")
497
- .replace("^", r"\^")
498
- .replace("$", r"\$")
499
- .replace("{", r"\{")
500
- .replace("}", r"\}")
501
- .rstrip("/")
502
- )
503
- + "$"
504
- )
505
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
506
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
507
- pattern = re.sub("[*]", "[^/]*", pattern)
508
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
509
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
490
+
491
+ pattern = glob_translate(path + ("/" if ends_with_slash else ""))
510
492
  pattern = re.compile(pattern)
493
+
511
494
  out = {
512
- p: allpaths[p]
513
- for p in sorted(allpaths)
514
- if pattern.match(p.replace("//", "/").rstrip("/"))
495
+ p: info
496
+ for p, info in sorted(allpaths.items())
497
+ if pattern.match(
498
+ (
499
+ p + "/"
500
+ if append_slash_to_dirname and info["type"] == "directory"
501
+ else p
502
+ )
503
+ )
515
504
  }
516
505
 
517
- # Return directories only when the glob end by a slash
518
- # This is needed for posix glob compliance
519
- if ends:
520
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
521
-
522
506
  if detail:
523
507
  return out
524
508
  else:
@@ -818,7 +802,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
818
802
  async with r:
819
803
  out = await r.read()
820
804
  if file:
821
- with open(file, "rb+") as f:
805
+ with open(file, "r+b") as f:
822
806
  f.seek(start)
823
807
  f.write(out)
824
808
  else:
@@ -855,7 +839,10 @@ async def _file_info(url, session, size_policy="head", **kwargs):
855
839
  if "Content-Length" in r.headers:
856
840
  # Some servers may choose to ignore Accept-Encoding and return
857
841
  # compressed content, in which case the returned size is unreliable.
858
- if r.headers.get("Content-Encoding", "identity") == "identity":
842
+ if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
843
+ "identity",
844
+ "",
845
+ ]:
859
846
  info["size"] = int(r.headers["Content-Length"])
860
847
  elif "Content-Range" in r.headers:
861
848
  info["size"] = int(r.headers["Content-Range"].split("/")[1])
@@ -175,7 +175,7 @@ class MemoryFileSystem(AbstractFileSystem):
175
175
  parent = self._parent(parent)
176
176
  if self.isfile(parent):
177
177
  raise FileExistsError(parent)
178
- if mode in ["rb", "ab", "rb+"]:
178
+ if mode in ["rb", "ab", "r+b"]:
179
179
  if path in self.store:
180
180
  f = self.store[path]
181
181
  if mode == "ab":
@@ -150,8 +150,29 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
150
150
  self.open_refs = open_refs
151
151
 
152
152
  @staticmethod
153
- def create(record_size, root, fs, **kwargs):
153
+ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
154
+ """Make empty parquet reference set
155
+
156
+ Parameters
157
+ ----------
158
+ root: str
159
+ Directory to contain the output; will be created
160
+ storage_options: dict | None
161
+ For making the filesystem to use for writing is fs is None
162
+ fs: FileSystem | None
163
+ Filesystem for writing
164
+ record_size: int
165
+ Number of references per parquet file
166
+ kwargs: passed to __init__
167
+
168
+ Returns
169
+ -------
170
+ LazyReferenceMapper instance
171
+ """
154
172
  met = {"metadata": {}, "record_size": record_size}
173
+ if fs is None:
174
+ fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
175
+ fs.makedirs(root, exist_ok=True)
155
176
  fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
156
177
  return LazyReferenceMapper(root, fs, **kwargs)
157
178
 
@@ -292,7 +313,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
292
313
  def _generate_record(self, field, record):
293
314
  """The references for a given parquet file of a given field"""
294
315
  refs = self.open_refs(field, record)
295
- it = iter(zip(refs.values()))
316
+ it = iter(zip(*refs.values()))
296
317
  if len(refs) == 3:
297
318
  # All urls
298
319
  return (list(t) for t in it)
@@ -603,7 +624,7 @@ class ReferenceFileSystem(AsyncFileSystem):
603
624
  **(ref_storage_args or target_options or {}), protocol=target_protocol
604
625
  )
605
626
  ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
606
- if ref_fs.isfile(fo):
627
+ if ref_fs.isfile(fo2):
607
628
  # text JSON
608
629
  with fsspec.open(fo, "rb", **dic) as f:
609
630
  logger.info("Read reference from URL %s", fo)
@@ -650,6 +671,7 @@ class ReferenceFileSystem(AsyncFileSystem):
650
671
  self.fss[protocol] = fs
651
672
  if remote_protocol is None:
652
673
  # get single protocol from references
674
+ # TODO: warning here, since this can be very expensive?
653
675
  for ref in self.references.values():
654
676
  if callable(ref):
655
677
  ref = ref()
@@ -772,24 +794,27 @@ class ReferenceFileSystem(AsyncFileSystem):
772
794
  raise NotImplementedError
773
795
  if isinstance(path, list) and (recursive or any("*" in p for p in path)):
774
796
  raise NotImplementedError
797
+ # TODO: if references is lazy, pre-fetch all paths in batch before access
775
798
  proto_dict = _protocol_groups(path, self.references)
776
799
  out = {}
777
800
  for proto, paths in proto_dict.items():
778
801
  fs = self.fss[proto]
779
- urls, starts, ends = [], [], []
802
+ urls, starts, ends, valid_paths = [], [], [], []
780
803
  for p in paths:
781
804
  # find references or label not-found. Early exit if any not
782
805
  # found and on_error is "raise"
783
806
  try:
784
807
  u, s, e = self._cat_common(p)
785
- urls.append(u)
786
- starts.append(s)
787
- ends.append(e)
788
808
  except FileNotFoundError as err:
789
809
  if on_error == "raise":
790
810
  raise
791
811
  if on_error != "omit":
792
812
  out[p] = err
813
+ else:
814
+ urls.append(u)
815
+ starts.append(s)
816
+ ends.append(e)
817
+ valid_paths.append(p)
793
818
 
794
819
  # process references into form for merging
795
820
  urls2 = []
@@ -797,7 +822,7 @@ class ReferenceFileSystem(AsyncFileSystem):
797
822
  ends2 = []
798
823
  paths2 = []
799
824
  whole_files = set()
800
- for u, s, e, p in zip(urls, starts, ends, paths):
825
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
801
826
  if isinstance(u, bytes):
802
827
  # data
803
828
  out[p] = u
@@ -809,7 +834,7 @@ class ReferenceFileSystem(AsyncFileSystem):
809
834
  starts2.append(s)
810
835
  ends2.append(e)
811
836
  paths2.append(p)
812
- for u, s, e, p in zip(urls, starts, ends, paths):
837
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
813
838
  # second run to account for files that are to be loaded whole
814
839
  if s is not None and u not in whole_files:
815
840
  urls2.append(u)
@@ -829,7 +854,7 @@ class ReferenceFileSystem(AsyncFileSystem):
829
854
  bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
830
855
 
831
856
  # unbundle from merged bytes - simple approach
832
- for u, s, e, p in zip(urls, starts, ends, paths):
857
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
833
858
  if p in out:
834
859
  continue # was bytes, already handled
835
860
  for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
@@ -963,16 +988,24 @@ class ReferenceFileSystem(AsyncFileSystem):
963
988
  elif len(part) == 1:
964
989
  size = None
965
990
  else:
966
- _, start, size = part
991
+ _, _, size = part
967
992
  par = path.rsplit("/", 1)[0] if "/" in path else ""
968
993
  par0 = par
994
+ subdirs = [par0]
969
995
  while par0 and par0 not in self.dircache:
970
- # build parent directories
971
- self.dircache[par0] = []
972
- self.dircache.setdefault(
973
- par0.rsplit("/", 1)[0] if "/" in par0 else "", []
974
- ).append({"name": par0, "type": "directory", "size": 0})
996
+ # collect parent directories
975
997
  par0 = self._parent(par0)
998
+ subdirs.append(par0)
999
+
1000
+ subdirs = subdirs[::-1]
1001
+ for parent, child in zip(subdirs, subdirs[1:]):
1002
+ # register newly discovered directories
1003
+ assert child not in self.dircache
1004
+ assert parent in self.dircache
1005
+ self.dircache[parent].append(
1006
+ {"name": child, "type": "directory", "size": 0}
1007
+ )
1008
+ self.dircache[child] = []
976
1009
 
977
1010
  self.dircache[par].append({"name": path, "type": "file", "size": size})
978
1011
 
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  This module contains SMBFileSystem class responsible for handling access to
4
3
  Windows Samba network shares by using package smbprotocol
@@ -21,7 +21,7 @@ class WebHDFS(AbstractFileSystem):
21
21
  """
22
22
  Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
23
23
 
24
- Three auth mechanisms are supported:
24
+ Four auth mechanisms are supported:
25
25
 
26
26
  insecure: no auth is done, and the user is assumed to be whoever they
27
27
  say they are (parameter ``user``), or a predefined value such as
@@ -34,6 +34,8 @@ class WebHDFS(AbstractFileSystem):
34
34
  service. Indeed, this client can also generate such tokens when
35
35
  not insecure. Note that tokens expire, but can be renewed (by a
36
36
  previously specified user) and may allow for proxying.
37
+ basic-auth: used when both parameter ``user`` and parameter ``password``
38
+ are provided.
37
39
 
38
40
  """
39
41
 
@@ -47,6 +49,7 @@ class WebHDFS(AbstractFileSystem):
47
49
  kerberos=False,
48
50
  token=None,
49
51
  user=None,
52
+ password=None,
50
53
  proxy_to=None,
51
54
  kerb_kwargs=None,
52
55
  data_proxy=None,
@@ -68,6 +71,9 @@ class WebHDFS(AbstractFileSystem):
68
71
  given
69
72
  user: str or None
70
73
  If given, assert the user name to connect with
74
+ password: str or None
75
+ If given, assert the password to use for basic auth. If password
76
+ is provided, user must be provided also
71
77
  proxy_to: str or None
72
78
  If given, the user has the authority to proxy, and this value is
73
79
  the user in who's name actions are taken
@@ -102,8 +108,19 @@ class WebHDFS(AbstractFileSystem):
102
108
  " token"
103
109
  )
104
110
  self.pars["delegation"] = token
105
- if user is not None:
106
- self.pars["user.name"] = user
111
+ self.user = user
112
+ self.password = password
113
+
114
+ if password is not None:
115
+ if user is None:
116
+ raise ValueError(
117
+ "If passing a password, the user must also be"
118
+ "set in order to set up the basic-auth"
119
+ )
120
+ else:
121
+ if user is not None:
122
+ self.pars["user.name"] = user
123
+
107
124
  if proxy_to is not None:
108
125
  self.pars["doas"] = proxy_to
109
126
  if kerberos and user is not None:
@@ -126,8 +143,13 @@ class WebHDFS(AbstractFileSystem):
126
143
 
127
144
  self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
128
145
 
146
+ if self.user is not None and self.password is not None:
147
+ from requests.auth import HTTPBasicAuth
148
+
149
+ self.session.auth = HTTPBasicAuth(self.user, self.password)
150
+
129
151
  def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
130
- url = self.url + quote(path or "")
152
+ url = self._apply_proxy(self.url + quote(path or "", safe="/="))
131
153
  args = kwargs.copy()
132
154
  args.update(self.pars)
133
155
  args["op"] = op.upper()
fsspec/registry.py CHANGED
@@ -57,9 +57,10 @@ def register_implementation(name, cls, clobber=False, errtxt=None):
57
57
  _registry[name] = cls
58
58
 
59
59
 
60
- # protocols mapped to the class which implements them. This dict can
60
+ # protocols mapped to the class which implements them. This dict can be
61
61
  # updated with register_implementation
62
62
  known_implementations = {
63
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
63
64
  "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
64
65
  "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
65
66
  "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
fsspec/spec.py CHANGED
@@ -17,6 +17,7 @@ from .dircache import DirCache
17
17
  from .transaction import Transaction
18
18
  from .utils import (
19
19
  _unstrip_protocol,
20
+ glob_translate,
20
21
  isfilelike,
21
22
  other_paths,
22
23
  read_block,
@@ -109,6 +110,7 @@ class AbstractFileSystem(metaclass=_Cached):
109
110
  async_impl = False
110
111
  mirror_sync_methods = False
111
112
  root_marker = "" # For some FSs, may require leading '/' or other character
113
+ transaction_type = Transaction
112
114
 
113
115
  #: Extra *class attributes* that should be considered when hashing.
114
116
  _extra_tokenize_attributes = ()
@@ -235,20 +237,20 @@ class AbstractFileSystem(metaclass=_Cached):
235
237
  for the normal and exception cases.
236
238
  """
237
239
  if self._transaction is None:
238
- self._transaction = Transaction(self)
240
+ self._transaction = self.transaction_type(self)
239
241
  return self._transaction
240
242
 
241
243
  def start_transaction(self):
242
244
  """Begin write transaction for deferring files, non-context version"""
243
245
  self._intrans = True
244
- self._transaction = Transaction(self)
246
+ self._transaction = self.transaction_type(self)
245
247
  return self.transaction
246
248
 
247
249
  def end_transaction(self):
248
250
  """Finish write transaction, non-context version"""
249
251
  self.transaction.complete()
250
252
  self._transaction = None
251
- # The invalid cache must be cleared after the transcation is completed.
253
+ # The invalid cache must be cleared after the transaction is completed.
252
254
  for path in self._invalidated_caches_in_transaction:
253
255
  self.invalidate_cache(path)
254
256
  self._invalidated_caches_in_transaction.clear()
@@ -551,10 +553,6 @@ class AbstractFileSystem(metaclass=_Cached):
551
553
 
552
554
  The `maxdepth` option is applied on the first `**` found in the path.
553
555
 
554
- Search path names that contain embedded characters special to this
555
- implementation of glob may not produce expected results;
556
- e.g., ``foo/bar/*starredfilename*``.
557
-
558
556
  kwargs are passed to ``ls``.
559
557
  """
560
558
  if maxdepth is not None and maxdepth < 1:
@@ -562,8 +560,12 @@ class AbstractFileSystem(metaclass=_Cached):
562
560
 
563
561
  import re
564
562
 
565
- ends = path.endswith("/")
563
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
564
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
566
565
  path = self._strip_protocol(path)
566
+ append_slash_to_dirname = ends_with_sep or path.endswith(
567
+ tuple(sep + "**" for sep in seps)
568
+ )
567
569
  idx_star = path.find("*") if path.find("*") >= 0 else len(path)
568
570
  idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
569
571
  idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
@@ -573,11 +575,11 @@ class AbstractFileSystem(metaclass=_Cached):
573
575
  detail = kwargs.pop("detail", False)
574
576
 
575
577
  if not has_magic(path):
576
- if self.exists(path):
578
+ if self.exists(path, **kwargs):
577
579
  if not detail:
578
580
  return [path]
579
581
  else:
580
- return {path: self.info(path)}
582
+ return {path: self.info(path, **kwargs)}
581
583
  else:
582
584
  if not detail:
583
585
  return [] # glob of non-existent returns empty
@@ -600,47 +602,22 @@ class AbstractFileSystem(metaclass=_Cached):
600
602
  depth = None
601
603
 
602
604
  allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs)
603
- # Escape characters special to python regex, leaving our supported
604
- # special characters in place.
605
- # See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
606
- # for shell globbing details.
607
- pattern = (
608
- "^"
609
- + (
610
- path.replace("\\", r"\\")
611
- .replace(".", r"\.")
612
- .replace("+", r"\+")
613
- .replace("//", "/")
614
- .replace("(", r"\(")
615
- .replace(")", r"\)")
616
- .replace("|", r"\|")
617
- .replace("^", r"\^")
618
- .replace("$", r"\$")
619
- .replace("{", r"\{")
620
- .replace("}", r"\}")
621
- .rstrip("/")
622
- .replace("?", ".")
623
- )
624
- + "$"
625
- )
626
- pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
627
- pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
628
- pattern = re.sub("[*]", "[^/]*", pattern)
629
- pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
630
- pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
605
+
606
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
631
607
  pattern = re.compile(pattern)
632
608
 
633
609
  out = {
634
- p: allpaths[p]
635
- for p in sorted(allpaths)
636
- if pattern.match(p.replace("//", "/").rstrip("/"))
610
+ p: info
611
+ for p, info in sorted(allpaths.items())
612
+ if pattern.match(
613
+ (
614
+ p + "/"
615
+ if append_slash_to_dirname and info["type"] == "directory"
616
+ else p
617
+ )
618
+ )
637
619
  }
638
620
 
639
- # Return directories only when the glob end by a slash
640
- # This is needed for posix glob compliance
641
- if ends:
642
- out = {k: v for k, v in out.items() if v["type"] == "directory"}
643
-
644
621
  if detail:
645
622
  return out
646
623
  else:
@@ -828,6 +805,16 @@ class AbstractFileSystem(metaclass=_Cached):
828
805
  def cat_ranges(
829
806
  self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
830
807
  ):
808
+ """Get the contents of byte ranges from one or more files
809
+
810
+ Parameters
811
+ ----------
812
+ paths: list
813
+ A list of of filepaths on this filesystems
814
+ starts, ends: int or list
815
+ Bytes limits of the read. If using a single int, the same value will be
816
+ used to read all the specified files.
817
+ """
831
818
  if max_gap is not None:
832
819
  raise NotImplementedError
833
820
  if not isinstance(paths, list):
@@ -835,7 +822,7 @@ class AbstractFileSystem(metaclass=_Cached):
835
822
  if not isinstance(starts, list):
836
823
  starts = [starts] * len(paths)
837
824
  if not isinstance(ends, list):
838
- ends = [starts] * len(paths)
825
+ ends = [ends] * len(paths)
839
826
  if len(starts) != len(paths) or len(ends) != len(paths):
840
827
  raise ValueError
841
828
  out = []
@@ -901,7 +888,8 @@ class AbstractFileSystem(metaclass=_Cached):
901
888
  os.makedirs(lpath, exist_ok=True)
902
889
  return None
903
890
 
904
- LocalFileSystem(auto_mkdir=True).makedirs(self._parent(lpath), exist_ok=True)
891
+ fs = LocalFileSystem(auto_mkdir=True)
892
+ fs.makedirs(fs._parent(lpath), exist_ok=True)
905
893
 
906
894
  with self.open(rpath, "rb", **kwargs) as f1:
907
895
  if outfile is None:
@@ -107,9 +107,9 @@ GLOB_EDGE_CASES_TESTS = {
107
107
  "subdir1/subfile2",
108
108
  ],
109
109
  ),
110
- ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
110
+ ("**/*1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
111
111
  (
112
- "**1",
112
+ "**/*1",
113
113
  True,
114
114
  None,
115
115
  [
@@ -120,14 +120,14 @@ GLOB_EDGE_CASES_TESTS = {
120
120
  "subdir1/nesteddir/nestedfile",
121
121
  ],
122
122
  ),
123
- ("**1", True, 1, ["file1"]),
123
+ ("**/*1", True, 1, ["file1"]),
124
124
  (
125
- "**1",
125
+ "**/*1",
126
126
  True,
127
127
  2,
128
128
  ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"],
129
129
  ),
130
- ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
130
+ ("**/*1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]),
131
131
  ("**/subdir0", False, None, []),
132
132
  ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]),
133
133
  ("**/subdir0/nested*", False, 2, []),
fsspec/transaction.py CHANGED
@@ -1,3 +1,6 @@
1
+ from collections import deque
2
+
3
+
1
4
  class Transaction:
2
5
  """Filesystem transaction write context
3
6
 
@@ -13,10 +16,11 @@ class Transaction:
13
16
  fs: FileSystem instance
14
17
  """
15
18
  self.fs = fs
16
- self.files = []
19
+ self.files = deque()
17
20
 
18
21
  def __enter__(self):
19
22
  self.start()
23
+ return self
20
24
 
21
25
  def __exit__(self, exc_type, exc_val, exc_tb):
22
26
  """End transaction and commit, if exit is not due to exception"""
@@ -27,17 +31,17 @@ class Transaction:
27
31
 
28
32
  def start(self):
29
33
  """Start a transaction on this FileSystem"""
30
- self.files = [] # clean up after previous failed completions
34
+ self.files = deque() # clean up after previous failed completions
31
35
  self.fs._intrans = True
32
36
 
33
37
  def complete(self, commit=True):
34
38
  """Finish transaction: commit or discard all deferred files"""
35
- for f in self.files:
39
+ while self.files:
40
+ f = self.files.popleft()
36
41
  if commit:
37
42
  f.commit()
38
43
  else:
39
44
  f.discard()
40
- self.files = []
41
45
  self.fs._intrans = False
42
46
 
43
47
 
fsspec/utils.py CHANGED
@@ -320,7 +320,7 @@ def tokenize(*args: Any, **kwargs: Any) -> str:
320
320
  h = md5(str(args).encode())
321
321
  except ValueError:
322
322
  # FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
323
- h = md5(str(args).encode(), usedforsecurity=False) # type: ignore[call-arg]
323
+ h = md5(str(args).encode(), usedforsecurity=False)
324
324
  return h.hexdigest()
325
325
 
326
326
 
@@ -436,6 +436,7 @@ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
436
436
 
437
437
 
438
438
  def get_protocol(url: str) -> str:
439
+ url = stringify_path(url)
439
440
  parts = re.split(r"(\:\:|\://)", url, 1)
440
441
  if len(parts) > 1:
441
442
  return parts[0]
@@ -627,3 +628,115 @@ def atomic_write(path: str, mode: str = "wb"):
627
628
  raise
628
629
  else:
629
630
  os.replace(fn, path)
631
+
632
+
633
+ def _translate(pat, STAR, QUESTION_MARK):
634
+ # Copied from: https://github.com/python/cpython/pull/106703.
635
+ res: list[str] = []
636
+ add = res.append
637
+ i, n = 0, len(pat)
638
+ while i < n:
639
+ c = pat[i]
640
+ i = i + 1
641
+ if c == "*":
642
+ # compress consecutive `*` into one
643
+ if (not res) or res[-1] is not STAR:
644
+ add(STAR)
645
+ elif c == "?":
646
+ add(QUESTION_MARK)
647
+ elif c == "[":
648
+ j = i
649
+ if j < n and pat[j] == "!":
650
+ j = j + 1
651
+ if j < n and pat[j] == "]":
652
+ j = j + 1
653
+ while j < n and pat[j] != "]":
654
+ j = j + 1
655
+ if j >= n:
656
+ add("\\[")
657
+ else:
658
+ stuff = pat[i:j]
659
+ if "-" not in stuff:
660
+ stuff = stuff.replace("\\", r"\\")
661
+ else:
662
+ chunks = []
663
+ k = i + 2 if pat[i] == "!" else i + 1
664
+ while True:
665
+ k = pat.find("-", k, j)
666
+ if k < 0:
667
+ break
668
+ chunks.append(pat[i:k])
669
+ i = k + 1
670
+ k = k + 3
671
+ chunk = pat[i:j]
672
+ if chunk:
673
+ chunks.append(chunk)
674
+ else:
675
+ chunks[-1] += "-"
676
+ # Remove empty ranges -- invalid in RE.
677
+ for k in range(len(chunks) - 1, 0, -1):
678
+ if chunks[k - 1][-1] > chunks[k][0]:
679
+ chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
680
+ del chunks[k]
681
+ # Escape backslashes and hyphens for set difference (--).
682
+ # Hyphens that create ranges shouldn't be escaped.
683
+ stuff = "-".join(
684
+ s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
685
+ )
686
+ # Escape set operations (&&, ~~ and ||).
687
+ stuff = re.sub(r"([&~|])", r"\\\1", stuff)
688
+ i = j + 1
689
+ if not stuff:
690
+ # Empty range: never match.
691
+ add("(?!)")
692
+ elif stuff == "!":
693
+ # Negated empty range: match any character.
694
+ add(".")
695
+ else:
696
+ if stuff[0] == "!":
697
+ stuff = "^" + stuff[1:]
698
+ elif stuff[0] in ("^", "["):
699
+ stuff = "\\" + stuff
700
+ add(f"[{stuff}]")
701
+ else:
702
+ add(re.escape(c))
703
+ assert i == n
704
+ return res
705
+
706
+
707
+ def glob_translate(pat):
708
+ # Copied from: https://github.com/python/cpython/pull/106703.
709
+ # The keyword parameters' values are fixed to:
710
+ # recursive=True, include_hidden=True, seps=None
711
+ """Translate a pathname with shell wildcards to a regular expression."""
712
+ if os.path.altsep:
713
+ seps = os.path.sep + os.path.altsep
714
+ else:
715
+ seps = os.path.sep
716
+ escaped_seps = "".join(map(re.escape, seps))
717
+ any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
718
+ not_sep = f"[^{escaped_seps}]"
719
+ one_last_segment = f"{not_sep}+"
720
+ one_segment = f"{one_last_segment}{any_sep}"
721
+ any_segments = f"(?:.+{any_sep})?"
722
+ any_last_segments = ".*"
723
+ results = []
724
+ parts = re.split(any_sep, pat)
725
+ last_part_idx = len(parts) - 1
726
+ for idx, part in enumerate(parts):
727
+ if part == "*":
728
+ results.append(one_segment if idx < last_part_idx else one_last_segment)
729
+ continue
730
+ if part == "**":
731
+ results.append(any_segments if idx < last_part_idx else any_last_segments)
732
+ continue
733
+ elif "**" in part:
734
+ raise ValueError(
735
+ "Invalid pattern: '**' can only be an entire path component"
736
+ )
737
+ if part:
738
+ results.extend(_translate(part, f"{not_sep}*", not_sep))
739
+ if idx < last_part_idx:
740
+ results.append(any_sep)
741
+ res = "".join(results)
742
+ return rf"(?s:{res})\Z"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fsspec
3
- Version: 2023.10.0
3
+ Version: 2023.12.0
4
4
  Summary: File-system specification
5
5
  Home-page: https://github.com/fsspec/filesystem_spec
6
6
  Maintainer: Martin Durant
@@ -1,53 +1,54 @@
1
1
  fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
2
- fsspec/_version.py,sha256=tXkjc9ntL7xecBDIEGncLHqDTApc-4eHEprCgBTMc2Y,501
2
+ fsspec/_version.py,sha256=jAwgcB8zkKMbpk-J9sbW492ytNhl4CO0dj1nip8cBPg,501
3
3
  fsspec/archive.py,sha256=42f8FEAyP0LIvAhdzS1djyxEyCvlWlnzPo7igWFflYM,2400
4
- fsspec/asyn.py,sha256=T0lbd5U7mK8HVTq4uhbRaeOfs8S2pGKp8PLMGTGh8tY,36587
5
- fsspec/caching.py,sha256=jaHAKBUczmynooHbDQZHrmeyOz-groI_GlfRJG7fI9Q,28781
4
+ fsspec/asyn.py,sha256=wx6vr5eBJYdW7a2cyv-LkfWu5dCDCcAjcDKjp3ylgR0,36154
5
+ fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
6
6
  fsspec/callbacks.py,sha256=qmD1v-WWxWmTmcUkEadq-_F_n3OGp9JYarjupUq_j3o,6358
7
7
  fsspec/compression.py,sha256=Zrbbb_m2SCF427BMJRYbDKMuSZIIV2YqteoS7AdR8Sc,4867
8
8
  fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
9
9
  fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
10
- fsspec/core.py,sha256=iCEdBN9cL3LV9RbaCUjV8P4QOR_kCtdD0tcu5vNBSqg,22029
10
+ fsspec/core.py,sha256=9oaIXN4PbbCRo4RZ-r7ZZ6LLm4vMyDwqv-x53kBfOmQ,22376
11
11
  fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
12
12
  fsspec/exceptions.py,sha256=s5eA2wIwzj-aeV0i_KDXsBaIhJJRKzmMGUGwuBHTnS4,348
13
13
  fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
14
- fsspec/generic.py,sha256=GpuEKtlU--YLNrCEzq3wb9EnfG2rsEGfAzrHyL_0Cfo,13246
15
- fsspec/gui.py,sha256=w0OnzIkzCu0VofSP6t0ajIBl-GkkzI6WbeidxEg9tJg,13867
14
+ fsspec/generic.py,sha256=StTDGXR-r9DJ7YEformhxfoexfewt_JkEUWN0oFRfbA,13373
15
+ fsspec/gui.py,sha256=BEVFplRsQyakNeCWU-vyZBD-16x_flEe0XiDxXparEU,13913
16
16
  fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
17
17
  fsspec/parquet.py,sha256=i4H3EU3K1Q6jp8sqjFji6a6gKnlOEZufaa7DRNE5X-4,19516
18
- fsspec/registry.py,sha256=mLdGDQYC3LqphVdBKUL4Q-QS_9wV--pEDjBZvUdyeXw,11073
19
- fsspec/spec.py,sha256=9HR1IWe8jWhtq8w9IYBW-waXMaCrQp3Rxe90joixGBU,66712
20
- fsspec/transaction.py,sha256=gNcyHtN1mzdazsGyhDadWsR_E9WyCr3S9OypBQGpp2s,2179
21
- fsspec/utils.py,sha256=1sedrZZfN5EE1_CggJSQBiZYGvwaXgRy2m6RubY_bIw,18904
18
+ fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
19
+ fsspec/spec.py,sha256=kfZpvKoh-fftKG6cOkOi2k0PJJwRqV4ZX_NElCBdcB8,66154
20
+ fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
21
+ fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
22
22
  fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  fsspec/implementations/arrow.py,sha256=1d-c5KceQJxm8QXML8fFXHvQx0wstG-tNJNsrgMX_CI,8240
24
24
  fsspec/implementations/cache_mapper.py,sha256=nE_sY3vw-jJbeBcAP6NGtacP3jHW_7EcG3yUSf0A-4Y,2502
25
25
  fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
26
- fsspec/implementations/cached.py,sha256=DTheLWDihvaq4WI4ZuJXP7kjnof3XhfiuOEGidyZGFE,28271
26
+ fsspec/implementations/cached.py,sha256=jCQSAIiO7M8OOmwG4cCYn4LGvMVCbldC9j7GeonwoEc,30238
27
27
  fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
28
+ fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
28
29
  fsspec/implementations/dbfs.py,sha256=0ndCE2OQqrWv6Y8ETufxOQ9ymIIO2JA_Q82bnilqTaw,14660
29
30
  fsspec/implementations/dirfs.py,sha256=8EEgKin5JgFBqzHaKig7ipiFAZJvbChUX_vpC_jagoY,11136
30
31
  fsspec/implementations/ftp.py,sha256=FzcHeieyda-ai_D8w4YKCzvI4gshuFYlBACBuEIx2Nk,11419
31
32
  fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
32
33
  fsspec/implementations/github.py,sha256=hCisC1vXzZ9kP1UnyGz2Ba8c9cS2JmSGFHtgHG_2Gqw,7190
33
- fsspec/implementations/http.py,sha256=MI2_MyG4cgvN_abUNvof1NcO80SaNtFS271uvXxbAhk,30042
34
+ fsspec/implementations/http.py,sha256=G3mBiMLDoYddwwzFg5B2zagmDX0r_2jFktrL3BqLU34,29248
34
35
  fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
35
36
  fsspec/implementations/libarchive.py,sha256=YYZoHefBQItg5lsyJiSy2qPEdUDnfO9IbQWm8dwsGtY,7150
36
37
  fsspec/implementations/local.py,sha256=iqXA60ICWEftzJd6LO2nkMH-vy1AoGe8BD4Wwlw94dw,13291
37
- fsspec/implementations/memory.py,sha256=4llvIMyQre_Pr63HFLOYliEJavRWCpHevMKVHyXzPpw,9698
38
- fsspec/implementations/reference.py,sha256=wtdxuIlGMucNQly8P64ya8qAvqb-5lBlXeyjUDgdvWY,41223
38
+ fsspec/implementations/memory.py,sha256=-a-NR66T-sGj9xTInUsu8KsEiqd156bF8Ui9BuXfmEA,9698
39
+ fsspec/implementations/reference.py,sha256=BHhvx8LIYyBk5OVBWw-PmZsAs_OCaLvF1p8656bwVJE,42438
39
40
  fsspec/implementations/sftp.py,sha256=kZRsE8lwMOSyX-wlVBOGVVrc3cxZBcYbsoX0L28w6sQ,5524
40
- fsspec/implementations/smb.py,sha256=dbLMWboZjSd5pxgxaGRc5qnwNNmZv1AyDzjgNO7OMlM,10615
41
+ fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
41
42
  fsspec/implementations/tar.py,sha256=5ZpUp4E3SYbqrwAX2ezvZJqUoZO74Pjb9FpF8o1YBGs,4071
42
- fsspec/implementations/webhdfs.py,sha256=pjRmcQpDR-PLp-dhzXoCqT8oseOpgPr0eiJml1l7a-Y,15322
43
+ fsspec/implementations/webhdfs.py,sha256=C5T96C_p66pUf2cQda-7HIZ9fKYwfCkupf2LN_7n7Dw,16145
43
44
  fsspec/implementations/zip.py,sha256=SJQihbLqUaMvN8kQiLmTOnc6Mdy3lozQp3MxmrJXqJw,4150
44
45
  fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
45
- fsspec/tests/abstract/common.py,sha256=X1ijH_pdMc9uVpZtgGj1P-2Zj9VIY-Y0tG3u1vTGcpE,4963
46
+ fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
46
47
  fsspec/tests/abstract/copy.py,sha256=nyCp1Q9apHzti2_UPDh3HzVhRmV7dciD-3dq-wM7JuU,19643
47
48
  fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
48
49
  fsspec/tests/abstract/put.py,sha256=hEf-yuMWBOT7B6eWcck3tMyJWzdVXtxkY-O6LUt1KAE,20877
49
- fsspec-2023.10.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
50
- fsspec-2023.10.0.dist-info/METADATA,sha256=5dTihW4k5zH2F6aQqi3VTIVxTWCe5qSbTXsDr-d0MmM,6829
51
- fsspec-2023.10.0.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
52
- fsspec-2023.10.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
53
- fsspec-2023.10.0.dist-info/RECORD,,
50
+ fsspec-2023.12.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
51
+ fsspec-2023.12.0.dist-info/METADATA,sha256=0PDf5Q5_GcLo-r7fJruRt98YZFONP9hX2NbmPfSllrM,6829
52
+ fsspec-2023.12.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
53
+ fsspec-2023.12.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
54
+ fsspec-2023.12.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5