fsspec 2024.5.0__py3-none-any.whl → 2024.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/caching.py +3 -2
- fsspec/compression.py +1 -1
- fsspec/generic.py +3 -0
- fsspec/implementations/cached.py +6 -16
- fsspec/implementations/dirfs.py +2 -0
- fsspec/implementations/github.py +12 -0
- fsspec/implementations/http.py +2 -1
- fsspec/implementations/reference.py +9 -0
- fsspec/implementations/smb.py +10 -0
- fsspec/json.py +121 -0
- fsspec/registry.py +24 -18
- fsspec/spec.py +119 -33
- fsspec/utils.py +1 -1
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/METADATA +10 -5
- fsspec-2024.6.1.dist-info/RECORD +55 -0
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/WHEEL +1 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
- fsspec/implementations/tests/conftest.py +0 -39
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +0 -18
- fsspec/implementations/tests/local/local_test.py +0 -14
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
- fsspec/implementations/tests/memory/memory_test.py +0 -14
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +0 -382
- fsspec/implementations/tests/test_arrow.py +0 -259
- fsspec/implementations/tests/test_cached.py +0 -1306
- fsspec/implementations/tests/test_common.py +0 -35
- fsspec/implementations/tests/test_dask.py +0 -29
- fsspec/implementations/tests/test_data.py +0 -20
- fsspec/implementations/tests/test_dbfs.py +0 -268
- fsspec/implementations/tests/test_dirfs.py +0 -588
- fsspec/implementations/tests/test_ftp.py +0 -178
- fsspec/implementations/tests/test_git.py +0 -76
- fsspec/implementations/tests/test_http.py +0 -577
- fsspec/implementations/tests/test_jupyter.py +0 -57
- fsspec/implementations/tests/test_libarchive.py +0 -33
- fsspec/implementations/tests/test_local.py +0 -1285
- fsspec/implementations/tests/test_memory.py +0 -382
- fsspec/implementations/tests/test_reference.py +0 -720
- fsspec/implementations/tests/test_sftp.py +0 -233
- fsspec/implementations/tests/test_smb.py +0 -139
- fsspec/implementations/tests/test_tar.py +0 -243
- fsspec/implementations/tests/test_webhdfs.py +0 -197
- fsspec/implementations/tests/test_zip.py +0 -134
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/conftest.py +0 -188
- fsspec/tests/data/listing.html +0 -1
- fsspec/tests/test_api.py +0 -498
- fsspec/tests/test_async.py +0 -230
- fsspec/tests/test_caches.py +0 -255
- fsspec/tests/test_callbacks.py +0 -89
- fsspec/tests/test_compression.py +0 -164
- fsspec/tests/test_config.py +0 -129
- fsspec/tests/test_core.py +0 -466
- fsspec/tests/test_downstream.py +0 -40
- fsspec/tests/test_file.py +0 -200
- fsspec/tests/test_fuse.py +0 -147
- fsspec/tests/test_generic.py +0 -90
- fsspec/tests/test_gui.py +0 -23
- fsspec/tests/test_mapping.py +0 -228
- fsspec/tests/test_parquet.py +0 -140
- fsspec/tests/test_registry.py +0 -134
- fsspec/tests/test_spec.py +0 -1167
- fsspec/tests/test_utils.py +0 -478
- fsspec-2024.5.0.dist-info/RECORD +0 -111
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/licenses/LICENSE +0 -0
fsspec/_version.py
CHANGED
|
@@ -12,5 +12,5 @@ __version__: str
|
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
|
13
13
|
version_tuple: VERSION_TUPLE
|
|
14
14
|
|
|
15
|
-
__version__ = version = '2024.
|
|
16
|
-
__version_tuple__ = version_tuple = (2024,
|
|
15
|
+
__version__ = version = '2024.6.1'
|
|
16
|
+
__version_tuple__ = version_tuple = (2024, 6, 1)
|
fsspec/caching.py
CHANGED
|
@@ -15,6 +15,7 @@ from typing import (
|
|
|
15
15
|
ClassVar,
|
|
16
16
|
Generic,
|
|
17
17
|
NamedTuple,
|
|
18
|
+
Optional,
|
|
18
19
|
OrderedDict,
|
|
19
20
|
TypeVar,
|
|
20
21
|
)
|
|
@@ -574,7 +575,7 @@ class KnownPartsOfAFile(BaseCache):
|
|
|
574
575
|
blocksize: int,
|
|
575
576
|
fetcher: Fetcher,
|
|
576
577
|
size: int,
|
|
577
|
-
data: dict[tuple[int, int], bytes] =
|
|
578
|
+
data: Optional[dict[tuple[int, int], bytes]] = None,
|
|
578
579
|
strict: bool = True,
|
|
579
580
|
**_: Any,
|
|
580
581
|
):
|
|
@@ -597,7 +598,7 @@ class KnownPartsOfAFile(BaseCache):
|
|
|
597
598
|
|
|
598
599
|
self.data = dict(zip(offsets, blocks))
|
|
599
600
|
else:
|
|
600
|
-
self.data =
|
|
601
|
+
self.data = {}
|
|
601
602
|
|
|
602
603
|
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
|
603
604
|
if start is None:
|
fsspec/compression.py
CHANGED
|
@@ -139,7 +139,7 @@ class SnappyFile(AbstractBufferedFile):
|
|
|
139
139
|
try:
|
|
140
140
|
import snappy
|
|
141
141
|
|
|
142
|
-
snappy.compress
|
|
142
|
+
snappy.compress(b"")
|
|
143
143
|
# Snappy may use the .sz file extension, but this is not part of the
|
|
144
144
|
# standard implementation.
|
|
145
145
|
register_compression("snappy", SnappyFile, [])
|
fsspec/generic.py
CHANGED
|
@@ -197,6 +197,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
197
197
|
)
|
|
198
198
|
result = {}
|
|
199
199
|
for k, v in out.items():
|
|
200
|
+
v = v.copy() # don't corrupt target FS dircache
|
|
200
201
|
name = fs.unstrip_protocol(k)
|
|
201
202
|
v["name"] = name
|
|
202
203
|
result[name] = v
|
|
@@ -210,6 +211,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
210
211
|
out = await fs._info(url, **kwargs)
|
|
211
212
|
else:
|
|
212
213
|
out = fs.info(url, **kwargs)
|
|
214
|
+
out = out.copy() # don't edit originals
|
|
213
215
|
out["name"] = fs.unstrip_protocol(out["name"])
|
|
214
216
|
return out
|
|
215
217
|
|
|
@@ -224,6 +226,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
224
226
|
out = await fs._ls(url, detail=True, **kwargs)
|
|
225
227
|
else:
|
|
226
228
|
out = fs.ls(url, detail=True, **kwargs)
|
|
229
|
+
out = [o.copy() for o in out] # don't edit originals
|
|
227
230
|
for o in out:
|
|
228
231
|
o["name"] = fs.unstrip_protocol(o["name"])
|
|
229
232
|
if detail:
|
fsspec/implementations/cached.py
CHANGED
|
@@ -425,7 +425,6 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
425
425
|
"clear_cache",
|
|
426
426
|
"clear_expired_cache",
|
|
427
427
|
"pop_from_cache",
|
|
428
|
-
"_mkcache",
|
|
429
428
|
"local_file",
|
|
430
429
|
"_paths_from_path",
|
|
431
430
|
"get_mapper",
|
|
@@ -435,12 +434,10 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
435
434
|
"__hash__",
|
|
436
435
|
"__eq__",
|
|
437
436
|
"to_json",
|
|
437
|
+
"to_dict",
|
|
438
438
|
"cache_size",
|
|
439
439
|
"pipe_file",
|
|
440
440
|
"pipe",
|
|
441
|
-
"isdir",
|
|
442
|
-
"isfile",
|
|
443
|
-
"exists",
|
|
444
441
|
"start_transaction",
|
|
445
442
|
"end_transaction",
|
|
446
443
|
}:
|
|
@@ -510,15 +507,6 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
510
507
|
^ hash(self.target_protocol)
|
|
511
508
|
)
|
|
512
509
|
|
|
513
|
-
def to_json(self):
|
|
514
|
-
"""Calculate JSON representation.
|
|
515
|
-
|
|
516
|
-
Not implemented yet for CachingFileSystem.
|
|
517
|
-
"""
|
|
518
|
-
raise NotImplementedError(
|
|
519
|
-
"CachingFileSystem JSON representation not implemented"
|
|
520
|
-
)
|
|
521
|
-
|
|
522
510
|
|
|
523
511
|
class WholeFileCacheFileSystem(CachingFileSystem):
|
|
524
512
|
"""Caches whole remote files on first access
|
|
@@ -663,7 +651,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
663
651
|
def _open(self, path, mode="rb", **kwargs):
|
|
664
652
|
path = self._strip_protocol(path)
|
|
665
653
|
if "r" not in mode:
|
|
666
|
-
|
|
654
|
+
hash = self._mapper(path)
|
|
655
|
+
fn = os.path.join(self.storage[-1], hash)
|
|
667
656
|
user_specified_kwargs = {
|
|
668
657
|
k: v
|
|
669
658
|
for k, v in kwargs.items()
|
|
@@ -806,7 +795,8 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
806
795
|
if self._intrans:
|
|
807
796
|
f = [_ for _ in self.transaction.files if _.path == path]
|
|
808
797
|
if f:
|
|
809
|
-
|
|
798
|
+
size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
|
|
799
|
+
return {"name": path, "size": size, "type": "file"}
|
|
810
800
|
f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
|
|
811
801
|
if f:
|
|
812
802
|
return {"name": path, "size": 0, "type": "directory"}
|
|
@@ -912,7 +902,7 @@ class LocalTempFile:
|
|
|
912
902
|
self.close()
|
|
913
903
|
|
|
914
904
|
def close(self):
|
|
915
|
-
self.size = self.fh.tell()
|
|
905
|
+
# self.size = self.fh.tell()
|
|
916
906
|
if self.closed:
|
|
917
907
|
return
|
|
918
908
|
self.fh.close()
|
fsspec/implementations/dirfs.py
CHANGED
|
@@ -56,6 +56,8 @@ class DirFileSystem(AsyncFileSystem):
|
|
|
56
56
|
if not path:
|
|
57
57
|
return self.path
|
|
58
58
|
return self.fs.sep.join((self.path, self._strip_protocol(path)))
|
|
59
|
+
if isinstance(path, dict):
|
|
60
|
+
return {self._join(_path): value for _path, value in path.items()}
|
|
59
61
|
return [self._join(_path) for _path in path]
|
|
60
62
|
|
|
61
63
|
def _relpath(self, path):
|
fsspec/implementations/github.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import requests
|
|
2
2
|
|
|
3
|
+
import fsspec
|
|
4
|
+
|
|
3
5
|
from ..spec import AbstractFileSystem
|
|
4
6
|
from ..utils import infer_storage_options
|
|
5
7
|
from .memory import MemoryFile
|
|
@@ -225,3 +227,13 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
225
227
|
raise FileNotFoundError(path)
|
|
226
228
|
r.raise_for_status()
|
|
227
229
|
return MemoryFile(None, None, r.content)
|
|
230
|
+
|
|
231
|
+
def cat(self, path, recursive=False, on_error="raise", **kwargs):
|
|
232
|
+
paths = self.expand_path(path, recursive=recursive)
|
|
233
|
+
urls = [
|
|
234
|
+
self.rurl.format(org=self.org, repo=self.repo, path=u, sha=self.root)
|
|
235
|
+
for u, sh in paths
|
|
236
|
+
]
|
|
237
|
+
fs = fsspec.filesystem("http")
|
|
238
|
+
data = fs.cat(urls, on_error="return")
|
|
239
|
+
return {u: v for ((k, v), u) in zip(data.items(), urls)}
|
fsspec/implementations/http.py
CHANGED
|
@@ -560,6 +560,7 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
560
560
|
if mode != "rb":
|
|
561
561
|
raise NotImplementedError("File mode not supported")
|
|
562
562
|
self.asynchronous = asynchronous
|
|
563
|
+
self.loop = loop
|
|
563
564
|
self.url = url
|
|
564
565
|
self.session = session
|
|
565
566
|
self.details = {"name": url, "size": size, "type": "file"}
|
|
@@ -572,7 +573,6 @@ class HTTPFile(AbstractBufferedFile):
|
|
|
572
573
|
cache_options=cache_options,
|
|
573
574
|
**kwargs,
|
|
574
575
|
)
|
|
575
|
-
self.loop = loop
|
|
576
576
|
|
|
577
577
|
def read(self, length=-1):
|
|
578
578
|
"""Read bytes from file
|
|
@@ -736,6 +736,7 @@ class HTTPStreamFile(AbstractBufferedFile):
|
|
|
736
736
|
return r
|
|
737
737
|
|
|
738
738
|
self.r = sync(self.loop, cor)
|
|
739
|
+
self.loop = fs.loop
|
|
739
740
|
|
|
740
741
|
def seek(self, loc, whence=0):
|
|
741
742
|
if loc == 0 and whence == 1:
|
|
@@ -935,6 +935,13 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
935
935
|
|
|
936
936
|
def _process_references0(self, references):
|
|
937
937
|
"""Make reference dict for Spec Version 0"""
|
|
938
|
+
if isinstance(references, dict):
|
|
939
|
+
# do not do this for lazy/parquet backend, which will not make dicts,
|
|
940
|
+
# but must remain writable in the original object
|
|
941
|
+
references = {
|
|
942
|
+
key: json.dumps(val) if isinstance(val, dict) else val
|
|
943
|
+
for key, val in references.items()
|
|
944
|
+
}
|
|
938
945
|
self.references = references
|
|
939
946
|
|
|
940
947
|
def _process_references1(self, references, template_overrides=None):
|
|
@@ -952,6 +959,8 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
952
959
|
if v.startswith("base64:"):
|
|
953
960
|
self.references[k] = base64.b64decode(v[7:])
|
|
954
961
|
self.references[k] = v
|
|
962
|
+
elif isinstance(v, dict):
|
|
963
|
+
self.references[k] = json.dumps(v)
|
|
955
964
|
elif self.templates:
|
|
956
965
|
u = v[0]
|
|
957
966
|
if "{{" in u:
|
fsspec/implementations/smb.py
CHANGED
|
@@ -68,6 +68,7 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
68
68
|
encrypt=None,
|
|
69
69
|
share_access=None,
|
|
70
70
|
register_session_retries=5,
|
|
71
|
+
auto_mkdir=False,
|
|
71
72
|
**kwargs,
|
|
72
73
|
):
|
|
73
74
|
"""
|
|
@@ -102,6 +103,10 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
102
103
|
- 'r': Allow other handles to be opened with read access.
|
|
103
104
|
- 'w': Allow other handles to be opened with write access.
|
|
104
105
|
- 'd': Allow other handles to be opened with delete access.
|
|
106
|
+
auto_mkdir: bool
|
|
107
|
+
Whether, when opening a file, the directory containing it should
|
|
108
|
+
be created (if it doesn't already exist). This is assumed by pyarrow
|
|
109
|
+
and zarr-python code.
|
|
105
110
|
"""
|
|
106
111
|
super().__init__(**kwargs)
|
|
107
112
|
self.host = host
|
|
@@ -113,6 +118,7 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
113
118
|
self.temppath = kwargs.pop("temppath", "")
|
|
114
119
|
self.share_access = share_access
|
|
115
120
|
self.register_session_retries = register_session_retries
|
|
121
|
+
self.auto_mkdir = auto_mkdir
|
|
116
122
|
self._connect()
|
|
117
123
|
|
|
118
124
|
@property
|
|
@@ -224,6 +230,8 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
224
230
|
By specifying 'share_access' in 'kwargs' it is possible to override the
|
|
225
231
|
default shared access setting applied in the constructor of this object.
|
|
226
232
|
"""
|
|
233
|
+
if self.auto_mkdir and "w" in mode:
|
|
234
|
+
self.makedirs(self._parent(path), exist_ok=True)
|
|
227
235
|
bls = block_size if block_size is not None and block_size >= 0 else -1
|
|
228
236
|
wpath = _as_unc_path(self.host, path)
|
|
229
237
|
share_access = kwargs.pop("share_access", self.share_access)
|
|
@@ -245,6 +253,8 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
245
253
|
"""Copy within two locations in the same filesystem"""
|
|
246
254
|
wpath1 = _as_unc_path(self.host, path1)
|
|
247
255
|
wpath2 = _as_unc_path(self.host, path2)
|
|
256
|
+
if self.auto_mkdir:
|
|
257
|
+
self.makedirs(self._parent(path2), exist_ok=True)
|
|
248
258
|
smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
|
|
249
259
|
|
|
250
260
|
def _rm(self, path):
|
fsspec/json.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from contextlib import suppress
|
|
3
|
+
from pathlib import PurePath
|
|
4
|
+
from typing import (
|
|
5
|
+
Any,
|
|
6
|
+
Callable,
|
|
7
|
+
ClassVar,
|
|
8
|
+
Dict,
|
|
9
|
+
List,
|
|
10
|
+
Mapping,
|
|
11
|
+
Optional,
|
|
12
|
+
Sequence,
|
|
13
|
+
Tuple,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
from .registry import _import_class, get_filesystem_class
|
|
17
|
+
from .spec import AbstractFileSystem
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FilesystemJSONEncoder(json.JSONEncoder):
|
|
21
|
+
include_password: ClassVar[bool] = True
|
|
22
|
+
|
|
23
|
+
def default(self, o: Any) -> Any:
|
|
24
|
+
if isinstance(o, AbstractFileSystem):
|
|
25
|
+
return o.to_dict(include_password=self.include_password)
|
|
26
|
+
if isinstance(o, PurePath):
|
|
27
|
+
cls = type(o)
|
|
28
|
+
return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
|
|
29
|
+
|
|
30
|
+
return super().default(o)
|
|
31
|
+
|
|
32
|
+
def make_serializable(self, obj: Any) -> Any:
|
|
33
|
+
"""
|
|
34
|
+
Recursively converts an object so that it can be JSON serialized via
|
|
35
|
+
:func:`json.dumps` and :func:`json.dump`, without actually calling
|
|
36
|
+
said functions.
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(obj, (str, int, float, bool)):
|
|
39
|
+
return obj
|
|
40
|
+
if isinstance(obj, Mapping):
|
|
41
|
+
return {k: self.make_serializable(v) for k, v in obj.items()}
|
|
42
|
+
if isinstance(obj, Sequence):
|
|
43
|
+
return [self.make_serializable(v) for v in obj]
|
|
44
|
+
|
|
45
|
+
return self.default(obj)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FilesystemJSONDecoder(json.JSONDecoder):
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
*,
|
|
52
|
+
object_hook: Optional[Callable[[Dict[str, Any]], Any]] = None,
|
|
53
|
+
parse_float: Optional[Callable[[str], Any]] = None,
|
|
54
|
+
parse_int: Optional[Callable[[str], Any]] = None,
|
|
55
|
+
parse_constant: Optional[Callable[[str], Any]] = None,
|
|
56
|
+
strict: bool = True,
|
|
57
|
+
object_pairs_hook: Optional[Callable[[List[Tuple[str, Any]]], Any]] = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
self.original_object_hook = object_hook
|
|
60
|
+
|
|
61
|
+
super().__init__(
|
|
62
|
+
object_hook=self.custom_object_hook,
|
|
63
|
+
parse_float=parse_float,
|
|
64
|
+
parse_int=parse_int,
|
|
65
|
+
parse_constant=parse_constant,
|
|
66
|
+
strict=strict,
|
|
67
|
+
object_pairs_hook=object_pairs_hook,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def try_resolve_path_cls(cls, dct: Dict[str, Any]):
|
|
72
|
+
with suppress(Exception):
|
|
73
|
+
fqp = dct["cls"]
|
|
74
|
+
|
|
75
|
+
path_cls = _import_class(fqp)
|
|
76
|
+
|
|
77
|
+
if issubclass(path_cls, PurePath):
|
|
78
|
+
return path_cls
|
|
79
|
+
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def try_resolve_fs_cls(cls, dct: Dict[str, Any]):
|
|
84
|
+
with suppress(Exception):
|
|
85
|
+
if "cls" in dct:
|
|
86
|
+
try:
|
|
87
|
+
fs_cls = _import_class(dct["cls"])
|
|
88
|
+
if issubclass(fs_cls, AbstractFileSystem):
|
|
89
|
+
return fs_cls
|
|
90
|
+
except Exception:
|
|
91
|
+
if "protocol" in dct: # Fallback if cls cannot be imported
|
|
92
|
+
return get_filesystem_class(dct["protocol"])
|
|
93
|
+
|
|
94
|
+
raise
|
|
95
|
+
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
def custom_object_hook(self, dct: Dict[str, Any]):
|
|
99
|
+
if "cls" in dct:
|
|
100
|
+
if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
|
|
101
|
+
return AbstractFileSystem.from_dict(dct)
|
|
102
|
+
if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
|
|
103
|
+
return obj_cls(dct["str"])
|
|
104
|
+
|
|
105
|
+
if self.original_object_hook is not None:
|
|
106
|
+
return self.original_object_hook(dct)
|
|
107
|
+
|
|
108
|
+
return dct
|
|
109
|
+
|
|
110
|
+
def unmake_serializable(self, obj: Any) -> Any:
|
|
111
|
+
"""
|
|
112
|
+
Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
|
|
113
|
+
"""
|
|
114
|
+
if isinstance(obj, dict):
|
|
115
|
+
obj = self.custom_object_hook(obj)
|
|
116
|
+
if isinstance(obj, dict):
|
|
117
|
+
return {k: self.unmake_serializable(v) for k, v in obj.items()}
|
|
118
|
+
if isinstance(obj, (list, tuple)):
|
|
119
|
+
return [self.unmake_serializable(v) for v in obj]
|
|
120
|
+
|
|
121
|
+
return obj
|
fsspec/registry.py
CHANGED
|
@@ -257,27 +257,33 @@ update the current installation.
|
|
|
257
257
|
"""
|
|
258
258
|
|
|
259
259
|
|
|
260
|
-
def _import_class(
|
|
261
|
-
"""Take a
|
|
260
|
+
def _import_class(fqp: str):
|
|
261
|
+
"""Take a fully-qualified path and return the imported class or identifier.
|
|
262
262
|
|
|
263
|
-
|
|
263
|
+
``fqp`` is of the form "package.module.klass" or
|
|
264
|
+
"package.module:subobject.klass".
|
|
265
|
+
|
|
266
|
+
Warnings
|
|
267
|
+
--------
|
|
268
|
+
This can import arbitrary modules. Make sure you haven't installed any modules
|
|
269
|
+
that may execute malicious code at import time.
|
|
264
270
|
"""
|
|
265
|
-
if ":" in
|
|
266
|
-
mod, name =
|
|
267
|
-
s3 = mod == "s3fs"
|
|
268
|
-
mod = importlib.import_module(mod)
|
|
269
|
-
if s3 and mod.__version__.split(".") < ["0", "5"]:
|
|
270
|
-
warnings.warn(s3_msg)
|
|
271
|
-
for part in name.split("."):
|
|
272
|
-
mod = getattr(mod, part)
|
|
273
|
-
return mod
|
|
271
|
+
if ":" in fqp:
|
|
272
|
+
mod, name = fqp.rsplit(":", 1)
|
|
274
273
|
else:
|
|
275
|
-
mod, name =
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
274
|
+
mod, name = fqp.rsplit(".", 1)
|
|
275
|
+
|
|
276
|
+
is_s3 = mod == "s3fs"
|
|
277
|
+
mod = importlib.import_module(mod)
|
|
278
|
+
if is_s3 and mod.__version__.split(".") < ["0", "5"]:
|
|
279
|
+
warnings.warn(s3_msg)
|
|
280
|
+
for part in name.split("."):
|
|
281
|
+
mod = getattr(mod, part)
|
|
282
|
+
|
|
283
|
+
if not isinstance(mod, type):
|
|
284
|
+
raise TypeError(f"{fqp} is not a class")
|
|
285
|
+
|
|
286
|
+
return mod
|
|
281
287
|
|
|
282
288
|
|
|
283
289
|
def filesystem(protocol, **storage_options):
|
fsspec/spec.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import io
|
|
4
|
+
import json
|
|
4
5
|
import logging
|
|
5
6
|
import os
|
|
6
7
|
import threading
|
|
@@ -9,7 +10,7 @@ import weakref
|
|
|
9
10
|
from errno import ESPIPE
|
|
10
11
|
from glob import has_magic
|
|
11
12
|
from hashlib import sha256
|
|
12
|
-
from typing import ClassVar
|
|
13
|
+
from typing import Any, ClassVar, Dict, Tuple
|
|
13
14
|
|
|
14
15
|
from .callbacks import DEFAULT_CALLBACK
|
|
15
16
|
from .config import apply_config, conf
|
|
@@ -115,6 +116,10 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
115
116
|
#: Extra *class attributes* that should be considered when hashing.
|
|
116
117
|
_extra_tokenize_attributes = ()
|
|
117
118
|
|
|
119
|
+
# Set by _Cached metaclass
|
|
120
|
+
storage_args: Tuple[Any, ...]
|
|
121
|
+
storage_options: Dict[str, Any]
|
|
122
|
+
|
|
118
123
|
def __init__(self, *args, **storage_options):
|
|
119
124
|
"""Create and configure file-system instance
|
|
120
125
|
|
|
@@ -1381,41 +1386,45 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1381
1386
|
length = size - offset
|
|
1382
1387
|
return read_block(f, offset, length, delimiter)
|
|
1383
1388
|
|
|
1384
|
-
def to_json(self):
|
|
1389
|
+
def to_json(self, *, include_password: bool = True) -> str:
|
|
1385
1390
|
"""
|
|
1386
|
-
JSON representation of this filesystem instance
|
|
1391
|
+
JSON representation of this filesystem instance.
|
|
1392
|
+
|
|
1393
|
+
Parameters
|
|
1394
|
+
----------
|
|
1395
|
+
include_password: bool, default True
|
|
1396
|
+
Whether to include the password (if any) in the output.
|
|
1387
1397
|
|
|
1388
1398
|
Returns
|
|
1389
1399
|
-------
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1400
|
+
JSON string with keys ``cls`` (the python location of this class),
|
|
1401
|
+
protocol (text name of this class's protocol, first one in case of
|
|
1402
|
+
multiple), ``args`` (positional args, usually empty), and all other
|
|
1403
|
+
keyword arguments as their own keys.
|
|
1404
|
+
|
|
1405
|
+
Warnings
|
|
1406
|
+
--------
|
|
1407
|
+
Serialized filesystems may contain sensitive information which have been
|
|
1408
|
+
passed to the constructor, such as passwords and tokens. Make sure you
|
|
1409
|
+
store and send them in a secure environment!
|
|
1394
1410
|
"""
|
|
1395
|
-
import
|
|
1411
|
+
from .json import FilesystemJSONEncoder
|
|
1396
1412
|
|
|
1397
|
-
cls = type(self)
|
|
1398
|
-
cls = ".".join((cls.__module__, cls.__name__))
|
|
1399
|
-
proto = (
|
|
1400
|
-
self.protocol[0]
|
|
1401
|
-
if isinstance(self.protocol, (tuple, list))
|
|
1402
|
-
else self.protocol
|
|
1403
|
-
)
|
|
1404
1413
|
return json.dumps(
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
)
|
|
1414
|
+
self,
|
|
1415
|
+
cls=type(
|
|
1416
|
+
"_FilesystemJSONEncoder",
|
|
1417
|
+
(FilesystemJSONEncoder,),
|
|
1418
|
+
{"include_password": include_password},
|
|
1419
|
+
),
|
|
1411
1420
|
)
|
|
1412
1421
|
|
|
1413
1422
|
@staticmethod
|
|
1414
|
-
def from_json(blob):
|
|
1423
|
+
def from_json(blob: str) -> AbstractFileSystem:
|
|
1415
1424
|
"""
|
|
1416
|
-
Recreate a filesystem instance from JSON representation
|
|
1425
|
+
Recreate a filesystem instance from JSON representation.
|
|
1417
1426
|
|
|
1418
|
-
See ``.to_json()`` for the expected structure of the input
|
|
1427
|
+
See ``.to_json()`` for the expected structure of the input.
|
|
1419
1428
|
|
|
1420
1429
|
Parameters
|
|
1421
1430
|
----------
|
|
@@ -1424,18 +1433,95 @@ class AbstractFileSystem(metaclass=_Cached):
|
|
|
1424
1433
|
Returns
|
|
1425
1434
|
-------
|
|
1426
1435
|
file system instance, not necessarily of this particular class.
|
|
1436
|
+
|
|
1437
|
+
Warnings
|
|
1438
|
+
--------
|
|
1439
|
+
This can import arbitrary modules (as determined by the ``cls`` key).
|
|
1440
|
+
Make sure you haven't installed any modules that may execute malicious code
|
|
1441
|
+
at import time.
|
|
1427
1442
|
"""
|
|
1428
|
-
import
|
|
1443
|
+
from .json import FilesystemJSONDecoder
|
|
1429
1444
|
|
|
1430
|
-
|
|
1445
|
+
return json.loads(blob, cls=FilesystemJSONDecoder)
|
|
1431
1446
|
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1447
|
+
def to_dict(self, *, include_password: bool = True) -> Dict[str, Any]:
|
|
1448
|
+
"""
|
|
1449
|
+
JSON-serializable dictionary representation of this filesystem instance.
|
|
1450
|
+
|
|
1451
|
+
Parameters
|
|
1452
|
+
----------
|
|
1453
|
+
include_password: bool, default True
|
|
1454
|
+
Whether to include the password (if any) in the output.
|
|
1455
|
+
|
|
1456
|
+
Returns
|
|
1457
|
+
-------
|
|
1458
|
+
Dictionary with keys ``cls`` (the python location of this class),
|
|
1459
|
+
protocol (text name of this class's protocol, first one in case of
|
|
1460
|
+
multiple), ``args`` (positional args, usually empty), and all other
|
|
1461
|
+
keyword arguments as their own keys.
|
|
1462
|
+
|
|
1463
|
+
Warnings
|
|
1464
|
+
--------
|
|
1465
|
+
Serialized filesystems may contain sensitive information which have been
|
|
1466
|
+
passed to the constructor, such as passwords and tokens. Make sure you
|
|
1467
|
+
store and send them in a secure environment!
|
|
1468
|
+
"""
|
|
1469
|
+
from .json import FilesystemJSONEncoder
|
|
1470
|
+
|
|
1471
|
+
json_encoder = FilesystemJSONEncoder()
|
|
1472
|
+
|
|
1473
|
+
cls = type(self)
|
|
1474
|
+
proto = self.protocol
|
|
1475
|
+
|
|
1476
|
+
storage_options = dict(self.storage_options)
|
|
1477
|
+
if not include_password:
|
|
1478
|
+
storage_options.pop("password", None)
|
|
1479
|
+
|
|
1480
|
+
return dict(
|
|
1481
|
+
cls=f"{cls.__module__}:{cls.__name__}",
|
|
1482
|
+
protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
|
|
1483
|
+
args=json_encoder.make_serializable(self.storage_args),
|
|
1484
|
+
**json_encoder.make_serializable(storage_options),
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
@staticmethod
|
|
1488
|
+
def from_dict(dct: Dict[str, Any]) -> AbstractFileSystem:
|
|
1489
|
+
"""
|
|
1490
|
+
Recreate a filesystem instance from dictionary representation.
|
|
1491
|
+
|
|
1492
|
+
See ``.to_dict()`` for the expected structure of the input.
|
|
1493
|
+
|
|
1494
|
+
Parameters
|
|
1495
|
+
----------
|
|
1496
|
+
dct: Dict[str, Any]
|
|
1497
|
+
|
|
1498
|
+
Returns
|
|
1499
|
+
-------
|
|
1500
|
+
file system instance, not necessarily of this particular class.
|
|
1501
|
+
|
|
1502
|
+
Warnings
|
|
1503
|
+
--------
|
|
1504
|
+
This can import arbitrary modules (as determined by the ``cls`` key).
|
|
1505
|
+
Make sure you haven't installed any modules that may execute malicious code
|
|
1506
|
+
at import time.
|
|
1507
|
+
"""
|
|
1508
|
+
from .json import FilesystemJSONDecoder
|
|
1509
|
+
|
|
1510
|
+
json_decoder = FilesystemJSONDecoder()
|
|
1511
|
+
|
|
1512
|
+
dct = dict(dct) # Defensive copy
|
|
1513
|
+
|
|
1514
|
+
cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
|
|
1515
|
+
if cls is None:
|
|
1516
|
+
raise ValueError("Not a serialized AbstractFileSystem")
|
|
1517
|
+
|
|
1518
|
+
dct.pop("cls", None)
|
|
1519
|
+
dct.pop("protocol", None)
|
|
1520
|
+
|
|
1521
|
+
return cls(
|
|
1522
|
+
*json_decoder.unmake_serializable(dct.pop("args", ())),
|
|
1523
|
+
**json_decoder.unmake_serializable(dct),
|
|
1524
|
+
)
|
|
1439
1525
|
|
|
1440
1526
|
def _get_pyarrow_filesystem(self):
|
|
1441
1527
|
"""
|
fsspec/utils.py
CHANGED
|
@@ -435,7 +435,7 @@ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
|
|
435
435
|
|
|
436
436
|
def get_protocol(url: str) -> str:
|
|
437
437
|
url = stringify_path(url)
|
|
438
|
-
parts = re.split(r"(\:\:|\://)", url, 1)
|
|
438
|
+
parts = re.split(r"(\:\:|\://)", url, maxsplit=1)
|
|
439
439
|
if len(parts) > 1:
|
|
440
440
|
return parts[0]
|
|
441
441
|
return "file"
|