fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +6 -1
- fsspec/_version.py +4 -4
- fsspec/archive.py +1 -1
- fsspec/asyn.py +35 -45
- fsspec/caching.py +161 -90
- fsspec/compression.py +2 -4
- fsspec/core.py +19 -6
- fsspec/fuse.py +2 -2
- fsspec/generic.py +5 -1
- fsspec/gui.py +4 -4
- fsspec/implementations/cached.py +105 -25
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/ftp.py +6 -6
- fsspec/implementations/git.py +3 -3
- fsspec/implementations/github.py +3 -7
- fsspec/implementations/http.py +34 -47
- fsspec/implementations/jupyter.py +5 -5
- fsspec/implementations/libarchive.py +1 -2
- fsspec/implementations/local.py +8 -4
- fsspec/implementations/memory.py +1 -1
- fsspec/implementations/reference.py +67 -25
- fsspec/implementations/sftp.py +11 -11
- fsspec/implementations/smb.py +4 -5
- fsspec/implementations/webhdfs.py +28 -8
- fsspec/implementations/zip.py +2 -2
- fsspec/mapping.py +2 -2
- fsspec/registry.py +8 -6
- fsspec/spec.py +41 -55
- fsspec/tests/abstract/common.py +5 -5
- fsspec/transaction.py +8 -4
- fsspec/utils.py +204 -37
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/METADATA +7 -6
- fsspec-2023.12.0.dist-info/RECORD +54 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/WHEEL +1 -1
- fsspec/implementations/http_sync.py +0 -882
- fsspec-2023.9.2.dist-info/RECORD +0 -54
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/top_level.txt +0 -0
fsspec/__init__.py
CHANGED
|
@@ -48,10 +48,15 @@ def process_entries():
|
|
|
48
48
|
specs = eps.select(group="fsspec.specs")
|
|
49
49
|
else:
|
|
50
50
|
specs = eps.get("fsspec.specs", [])
|
|
51
|
+
registered_names = {}
|
|
51
52
|
for spec in specs:
|
|
52
53
|
err_msg = f"Unable to load filesystem from {spec}"
|
|
54
|
+
name = spec.name
|
|
55
|
+
if name in registered_names:
|
|
56
|
+
continue
|
|
57
|
+
registered_names[name] = True
|
|
53
58
|
register_implementation(
|
|
54
|
-
|
|
59
|
+
name,
|
|
55
60
|
spec.value.replace(":", "."),
|
|
56
61
|
errtxt=err_msg,
|
|
57
62
|
# We take our implementations as the ones to overload with if
|
fsspec/_version.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
# This file was generated by 'versioneer.py' (0.
|
|
2
|
+
# This file was generated by 'versioneer.py' (0.29) from
|
|
3
3
|
# revision-control system data, or from the parent directory name of an
|
|
4
4
|
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
|
5
5
|
# of this file.
|
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2023-
|
|
11
|
+
"date": "2023-12-02T20:51:30-0500",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "2023.
|
|
14
|
+
"full-revisionid": "5cf9cd952c5d276835d3caef9c32fcf69d55b10c",
|
|
15
|
+
"version": "2023.12.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
fsspec/archive.py
CHANGED
|
@@ -13,7 +13,7 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
def __str__(self):
|
|
16
|
-
return "<Archive-like object
|
|
16
|
+
return f"<Archive-like object {type(self).__name__} at {id(self)}>"
|
|
17
17
|
|
|
18
18
|
__repr__ = __str__
|
|
19
19
|
|
fsspec/asyn.py
CHANGED
|
@@ -15,7 +15,7 @@ from .callbacks import _DEFAULT_CALLBACK
|
|
|
15
15
|
from .exceptions import FSTimeoutError
|
|
16
16
|
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
|
17
17
|
from .spec import AbstractBufferedFile, AbstractFileSystem
|
|
18
|
-
from .utils import is_exception, other_paths
|
|
18
|
+
from .utils import glob_translate, is_exception, other_paths
|
|
19
19
|
|
|
20
20
|
private = re.compile("_[^_]")
|
|
21
21
|
iothread = [None] # dedicated fsspec IO thread
|
|
@@ -106,7 +106,7 @@ def sync(loop, func, *args, timeout=None, **kwargs):
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def sync_wrapper(func, obj=None):
|
|
109
|
-
"""Given a function, make so can be called in
|
|
109
|
+
"""Given a function, make so can be called in blocking contexts
|
|
110
110
|
|
|
111
111
|
Leave obj=None if defining within a class. Pass the instance if attaching
|
|
112
112
|
as an attribute of the instance.
|
|
@@ -426,7 +426,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
426
426
|
end = ""
|
|
427
427
|
if isinstance(end, numbers.Integral):
|
|
428
428
|
end -= 1 # bytes range is inclusive
|
|
429
|
-
return "bytes
|
|
429
|
+
return f"bytes={start}-{end}"
|
|
430
430
|
|
|
431
431
|
async def _cat_file(self, path, start=None, end=None, **kwargs):
|
|
432
432
|
raise NotImplementedError
|
|
@@ -467,6 +467,16 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
467
467
|
on_error="return",
|
|
468
468
|
**kwargs,
|
|
469
469
|
):
|
|
470
|
+
"""Get the contents of byte ranges from one or more files
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
paths: list
|
|
475
|
+
A list of of filepaths on this filesystems
|
|
476
|
+
starts, ends: int or list
|
|
477
|
+
Bytes limits of the read. If using a single int, the same value will be
|
|
478
|
+
used to read all the specified files.
|
|
479
|
+
"""
|
|
470
480
|
# TODO: on_error
|
|
471
481
|
if max_gap is not None:
|
|
472
482
|
# use utils.merge_offset_ranges
|
|
@@ -476,7 +486,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
476
486
|
if not isinstance(starts, Iterable):
|
|
477
487
|
starts = [starts] * len(paths)
|
|
478
488
|
if not isinstance(ends, Iterable):
|
|
479
|
-
ends = [
|
|
489
|
+
ends = [ends] * len(paths)
|
|
480
490
|
if len(starts) != len(paths) or len(ends) != len(paths):
|
|
481
491
|
raise ValueError
|
|
482
492
|
coros = [
|
|
@@ -662,9 +672,9 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
662
672
|
[self._size(p) for p in paths], batch_size=batch_size
|
|
663
673
|
)
|
|
664
674
|
|
|
665
|
-
async def _exists(self, path):
|
|
675
|
+
async def _exists(self, path, **kwargs):
|
|
666
676
|
try:
|
|
667
|
-
await self._info(path)
|
|
677
|
+
await self._info(path, **kwargs)
|
|
668
678
|
return True
|
|
669
679
|
except FileNotFoundError:
|
|
670
680
|
return False
|
|
@@ -735,8 +745,12 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
735
745
|
|
|
736
746
|
import re
|
|
737
747
|
|
|
738
|
-
|
|
748
|
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
|
749
|
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
|
739
750
|
path = self._strip_protocol(path)
|
|
751
|
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
|
752
|
+
tuple(sep + "**" for sep in seps)
|
|
753
|
+
)
|
|
740
754
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
741
755
|
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
|
742
756
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
@@ -746,11 +760,11 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
746
760
|
detail = kwargs.pop("detail", False)
|
|
747
761
|
|
|
748
762
|
if not has_magic(path):
|
|
749
|
-
if await self._exists(path):
|
|
763
|
+
if await self._exists(path, **kwargs):
|
|
750
764
|
if not detail:
|
|
751
765
|
return [path]
|
|
752
766
|
else:
|
|
753
|
-
return {path: await self._info(path)}
|
|
767
|
+
return {path: await self._info(path, **kwargs)}
|
|
754
768
|
else:
|
|
755
769
|
if not detail:
|
|
756
770
|
return [] # glob of non-existent returns empty
|
|
@@ -775,46 +789,22 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
775
789
|
allpaths = await self._find(
|
|
776
790
|
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
|
777
791
|
)
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
781
|
-
# for shell globbing details.
|
|
782
|
-
pattern = (
|
|
783
|
-
"^"
|
|
784
|
-
+ (
|
|
785
|
-
path.replace("\\", r"\\")
|
|
786
|
-
.replace(".", r"\.")
|
|
787
|
-
.replace("+", r"\+")
|
|
788
|
-
.replace("//", "/")
|
|
789
|
-
.replace("(", r"\(")
|
|
790
|
-
.replace(")", r"\)")
|
|
791
|
-
.replace("|", r"\|")
|
|
792
|
-
.replace("^", r"\^")
|
|
793
|
-
.replace("$", r"\$")
|
|
794
|
-
.replace("{", r"\{")
|
|
795
|
-
.replace("}", r"\}")
|
|
796
|
-
.rstrip("/")
|
|
797
|
-
.replace("?", ".")
|
|
798
|
-
)
|
|
799
|
-
+ "$"
|
|
800
|
-
)
|
|
801
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
802
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
803
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
804
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
805
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
792
|
+
|
|
793
|
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
|
806
794
|
pattern = re.compile(pattern)
|
|
795
|
+
|
|
807
796
|
out = {
|
|
808
|
-
p:
|
|
809
|
-
for p in sorted(allpaths)
|
|
810
|
-
if pattern.match(
|
|
797
|
+
p: info
|
|
798
|
+
for p, info in sorted(allpaths.items())
|
|
799
|
+
if pattern.match(
|
|
800
|
+
(
|
|
801
|
+
p + "/"
|
|
802
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
803
|
+
else p
|
|
804
|
+
)
|
|
805
|
+
)
|
|
811
806
|
}
|
|
812
807
|
|
|
813
|
-
# Return directories only when the glob end by a slash
|
|
814
|
-
# This is needed for posix glob compliance
|
|
815
|
-
if ends:
|
|
816
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
817
|
-
|
|
818
808
|
if detail:
|
|
819
809
|
return out
|
|
820
810
|
else:
|