fsspec 2024.5.0__py3-none-any.whl → 2024.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. fsspec/_version.py +2 -2
  2. fsspec/caching.py +3 -2
  3. fsspec/compression.py +1 -1
  4. fsspec/generic.py +3 -0
  5. fsspec/implementations/cached.py +6 -16
  6. fsspec/implementations/dirfs.py +2 -0
  7. fsspec/implementations/github.py +12 -0
  8. fsspec/implementations/http.py +2 -1
  9. fsspec/implementations/reference.py +9 -0
  10. fsspec/implementations/smb.py +10 -0
  11. fsspec/json.py +121 -0
  12. fsspec/registry.py +24 -18
  13. fsspec/spec.py +119 -33
  14. fsspec/utils.py +1 -1
  15. {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/METADATA +10 -5
  16. fsspec-2024.6.1.dist-info/RECORD +55 -0
  17. {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/WHEEL +1 -1
  18. fsspec/implementations/tests/__init__.py +0 -0
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
  22. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
  23. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
  24. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
  25. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
  26. fsspec/implementations/tests/conftest.py +0 -39
  27. fsspec/implementations/tests/local/__init__.py +0 -0
  28. fsspec/implementations/tests/local/local_fixtures.py +0 -18
  29. fsspec/implementations/tests/local/local_test.py +0 -14
  30. fsspec/implementations/tests/memory/__init__.py +0 -0
  31. fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
  32. fsspec/implementations/tests/memory/memory_test.py +0 -14
  33. fsspec/implementations/tests/out.zip +0 -0
  34. fsspec/implementations/tests/test_archive.py +0 -382
  35. fsspec/implementations/tests/test_arrow.py +0 -259
  36. fsspec/implementations/tests/test_cached.py +0 -1306
  37. fsspec/implementations/tests/test_common.py +0 -35
  38. fsspec/implementations/tests/test_dask.py +0 -29
  39. fsspec/implementations/tests/test_data.py +0 -20
  40. fsspec/implementations/tests/test_dbfs.py +0 -268
  41. fsspec/implementations/tests/test_dirfs.py +0 -588
  42. fsspec/implementations/tests/test_ftp.py +0 -178
  43. fsspec/implementations/tests/test_git.py +0 -76
  44. fsspec/implementations/tests/test_http.py +0 -577
  45. fsspec/implementations/tests/test_jupyter.py +0 -57
  46. fsspec/implementations/tests/test_libarchive.py +0 -33
  47. fsspec/implementations/tests/test_local.py +0 -1285
  48. fsspec/implementations/tests/test_memory.py +0 -382
  49. fsspec/implementations/tests/test_reference.py +0 -720
  50. fsspec/implementations/tests/test_sftp.py +0 -233
  51. fsspec/implementations/tests/test_smb.py +0 -139
  52. fsspec/implementations/tests/test_tar.py +0 -243
  53. fsspec/implementations/tests/test_webhdfs.py +0 -197
  54. fsspec/implementations/tests/test_zip.py +0 -134
  55. fsspec/tests/__init__.py +0 -0
  56. fsspec/tests/conftest.py +0 -188
  57. fsspec/tests/data/listing.html +0 -1
  58. fsspec/tests/test_api.py +0 -498
  59. fsspec/tests/test_async.py +0 -230
  60. fsspec/tests/test_caches.py +0 -255
  61. fsspec/tests/test_callbacks.py +0 -89
  62. fsspec/tests/test_compression.py +0 -164
  63. fsspec/tests/test_config.py +0 -129
  64. fsspec/tests/test_core.py +0 -466
  65. fsspec/tests/test_downstream.py +0 -40
  66. fsspec/tests/test_file.py +0 -200
  67. fsspec/tests/test_fuse.py +0 -147
  68. fsspec/tests/test_generic.py +0 -90
  69. fsspec/tests/test_gui.py +0 -23
  70. fsspec/tests/test_mapping.py +0 -228
  71. fsspec/tests/test_parquet.py +0 -140
  72. fsspec/tests/test_registry.py +0 -134
  73. fsspec/tests/test_spec.py +0 -1167
  74. fsspec/tests/test_utils.py +0 -478
  75. fsspec-2024.5.0.dist-info/RECORD +0 -111
  76. {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/licenses/LICENSE +0 -0
fsspec/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '2024.5.0'
16
- __version_tuple__ = version_tuple = (2024, 5, 0)
15
+ __version__ = version = '2024.6.1'
16
+ __version_tuple__ = version_tuple = (2024, 6, 1)
fsspec/caching.py CHANGED
@@ -15,6 +15,7 @@ from typing import (
15
15
  ClassVar,
16
16
  Generic,
17
17
  NamedTuple,
18
+ Optional,
18
19
  OrderedDict,
19
20
  TypeVar,
20
21
  )
@@ -574,7 +575,7 @@ class KnownPartsOfAFile(BaseCache):
574
575
  blocksize: int,
575
576
  fetcher: Fetcher,
576
577
  size: int,
577
- data: dict[tuple[int, int], bytes] = {},
578
+ data: Optional[dict[tuple[int, int], bytes]] = None,
578
579
  strict: bool = True,
579
580
  **_: Any,
580
581
  ):
@@ -597,7 +598,7 @@ class KnownPartsOfAFile(BaseCache):
597
598
 
598
599
  self.data = dict(zip(offsets, blocks))
599
600
  else:
600
- self.data = data
601
+ self.data = {}
601
602
 
602
603
  def _fetch(self, start: int | None, stop: int | None) -> bytes:
603
604
  if start is None:
fsspec/compression.py CHANGED
@@ -139,7 +139,7 @@ class SnappyFile(AbstractBufferedFile):
139
139
  try:
140
140
  import snappy
141
141
 
142
- snappy.compress
142
+ snappy.compress(b"")
143
143
  # Snappy may use the .sz file extension, but this is not part of the
144
144
  # standard implementation.
145
145
  register_compression("snappy", SnappyFile, [])
fsspec/generic.py CHANGED
@@ -197,6 +197,7 @@ class GenericFileSystem(AsyncFileSystem):
197
197
  )
198
198
  result = {}
199
199
  for k, v in out.items():
200
+ v = v.copy() # don't corrupt target FS dircache
200
201
  name = fs.unstrip_protocol(k)
201
202
  v["name"] = name
202
203
  result[name] = v
@@ -210,6 +211,7 @@ class GenericFileSystem(AsyncFileSystem):
210
211
  out = await fs._info(url, **kwargs)
211
212
  else:
212
213
  out = fs.info(url, **kwargs)
214
+ out = out.copy() # don't edit originals
213
215
  out["name"] = fs.unstrip_protocol(out["name"])
214
216
  return out
215
217
 
@@ -224,6 +226,7 @@ class GenericFileSystem(AsyncFileSystem):
224
226
  out = await fs._ls(url, detail=True, **kwargs)
225
227
  else:
226
228
  out = fs.ls(url, detail=True, **kwargs)
229
+ out = [o.copy() for o in out] # don't edit originals
227
230
  for o in out:
228
231
  o["name"] = fs.unstrip_protocol(o["name"])
229
232
  if detail:
@@ -425,7 +425,6 @@ class CachingFileSystem(AbstractFileSystem):
425
425
  "clear_cache",
426
426
  "clear_expired_cache",
427
427
  "pop_from_cache",
428
- "_mkcache",
429
428
  "local_file",
430
429
  "_paths_from_path",
431
430
  "get_mapper",
@@ -435,12 +434,10 @@ class CachingFileSystem(AbstractFileSystem):
435
434
  "__hash__",
436
435
  "__eq__",
437
436
  "to_json",
437
+ "to_dict",
438
438
  "cache_size",
439
439
  "pipe_file",
440
440
  "pipe",
441
- "isdir",
442
- "isfile",
443
- "exists",
444
441
  "start_transaction",
445
442
  "end_transaction",
446
443
  }:
@@ -510,15 +507,6 @@ class CachingFileSystem(AbstractFileSystem):
510
507
  ^ hash(self.target_protocol)
511
508
  )
512
509
 
513
- def to_json(self):
514
- """Calculate JSON representation.
515
-
516
- Not implemented yet for CachingFileSystem.
517
- """
518
- raise NotImplementedError(
519
- "CachingFileSystem JSON representation not implemented"
520
- )
521
-
522
510
 
523
511
  class WholeFileCacheFileSystem(CachingFileSystem):
524
512
  """Caches whole remote files on first access
@@ -663,7 +651,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
663
651
  def _open(self, path, mode="rb", **kwargs):
664
652
  path = self._strip_protocol(path)
665
653
  if "r" not in mode:
666
- fn = self._make_local_details(path)
654
+ hash = self._mapper(path)
655
+ fn = os.path.join(self.storage[-1], hash)
667
656
  user_specified_kwargs = {
668
657
  k: v
669
658
  for k, v in kwargs.items()
@@ -806,7 +795,8 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
806
795
  if self._intrans:
807
796
  f = [_ for _ in self.transaction.files if _.path == path]
808
797
  if f:
809
- return {"name": path, "size": f[0].size or f[0].tell(), "type": "file"}
798
+ size = os.path.getsize(f[0].fn) if f[0].closed else f[0].tell()
799
+ return {"name": path, "size": size, "type": "file"}
810
800
  f = any(_.path.startswith(path + "/") for _ in self.transaction.files)
811
801
  if f:
812
802
  return {"name": path, "size": 0, "type": "directory"}
@@ -912,7 +902,7 @@ class LocalTempFile:
912
902
  self.close()
913
903
 
914
904
  def close(self):
915
- self.size = self.fh.tell()
905
+ # self.size = self.fh.tell()
916
906
  if self.closed:
917
907
  return
918
908
  self.fh.close()
@@ -56,6 +56,8 @@ class DirFileSystem(AsyncFileSystem):
56
56
  if not path:
57
57
  return self.path
58
58
  return self.fs.sep.join((self.path, self._strip_protocol(path)))
59
+ if isinstance(path, dict):
60
+ return {self._join(_path): value for _path, value in path.items()}
59
61
  return [self._join(_path) for _path in path]
60
62
 
61
63
  def _relpath(self, path):
@@ -1,5 +1,7 @@
1
1
  import requests
2
2
 
3
+ import fsspec
4
+
3
5
  from ..spec import AbstractFileSystem
4
6
  from ..utils import infer_storage_options
5
7
  from .memory import MemoryFile
@@ -225,3 +227,13 @@ class GithubFileSystem(AbstractFileSystem):
225
227
  raise FileNotFoundError(path)
226
228
  r.raise_for_status()
227
229
  return MemoryFile(None, None, r.content)
230
+
231
+ def cat(self, path, recursive=False, on_error="raise", **kwargs):
232
+ paths = self.expand_path(path, recursive=recursive)
233
+ urls = [
234
+ self.rurl.format(org=self.org, repo=self.repo, path=u, sha=self.root)
235
+ for u, sh in paths
236
+ ]
237
+ fs = fsspec.filesystem("http")
238
+ data = fs.cat(urls, on_error="return")
239
+ return {u: v for ((k, v), u) in zip(data.items(), urls)}
@@ -560,6 +560,7 @@ class HTTPFile(AbstractBufferedFile):
560
560
  if mode != "rb":
561
561
  raise NotImplementedError("File mode not supported")
562
562
  self.asynchronous = asynchronous
563
+ self.loop = loop
563
564
  self.url = url
564
565
  self.session = session
565
566
  self.details = {"name": url, "size": size, "type": "file"}
@@ -572,7 +573,6 @@ class HTTPFile(AbstractBufferedFile):
572
573
  cache_options=cache_options,
573
574
  **kwargs,
574
575
  )
575
- self.loop = loop
576
576
 
577
577
  def read(self, length=-1):
578
578
  """Read bytes from file
@@ -736,6 +736,7 @@ class HTTPStreamFile(AbstractBufferedFile):
736
736
  return r
737
737
 
738
738
  self.r = sync(self.loop, cor)
739
+ self.loop = fs.loop
739
740
 
740
741
  def seek(self, loc, whence=0):
741
742
  if loc == 0 and whence == 1:
@@ -935,6 +935,13 @@ class ReferenceFileSystem(AsyncFileSystem):
935
935
 
936
936
  def _process_references0(self, references):
937
937
  """Make reference dict for Spec Version 0"""
938
+ if isinstance(references, dict):
939
+ # do not do this for lazy/parquet backend, which will not make dicts,
940
+ # but must remain writable in the original object
941
+ references = {
942
+ key: json.dumps(val) if isinstance(val, dict) else val
943
+ for key, val in references.items()
944
+ }
938
945
  self.references = references
939
946
 
940
947
  def _process_references1(self, references, template_overrides=None):
@@ -952,6 +959,8 @@ class ReferenceFileSystem(AsyncFileSystem):
952
959
  if v.startswith("base64:"):
953
960
  self.references[k] = base64.b64decode(v[7:])
954
961
  self.references[k] = v
962
+ elif isinstance(v, dict):
963
+ self.references[k] = json.dumps(v)
955
964
  elif self.templates:
956
965
  u = v[0]
957
966
  if "{{" in u:
@@ -68,6 +68,7 @@ class SMBFileSystem(AbstractFileSystem):
68
68
  encrypt=None,
69
69
  share_access=None,
70
70
  register_session_retries=5,
71
+ auto_mkdir=False,
71
72
  **kwargs,
72
73
  ):
73
74
  """
@@ -102,6 +103,10 @@ class SMBFileSystem(AbstractFileSystem):
102
103
  - 'r': Allow other handles to be opened with read access.
103
104
  - 'w': Allow other handles to be opened with write access.
104
105
  - 'd': Allow other handles to be opened with delete access.
106
+ auto_mkdir: bool
107
+ Whether, when opening a file, the directory containing it should
108
+ be created (if it doesn't already exist). This is assumed by pyarrow
109
+ and zarr-python code.
105
110
  """
106
111
  super().__init__(**kwargs)
107
112
  self.host = host
@@ -113,6 +118,7 @@ class SMBFileSystem(AbstractFileSystem):
113
118
  self.temppath = kwargs.pop("temppath", "")
114
119
  self.share_access = share_access
115
120
  self.register_session_retries = register_session_retries
121
+ self.auto_mkdir = auto_mkdir
116
122
  self._connect()
117
123
 
118
124
  @property
@@ -224,6 +230,8 @@ class SMBFileSystem(AbstractFileSystem):
224
230
  By specifying 'share_access' in 'kwargs' it is possible to override the
225
231
  default shared access setting applied in the constructor of this object.
226
232
  """
233
+ if self.auto_mkdir and "w" in mode:
234
+ self.makedirs(self._parent(path), exist_ok=True)
227
235
  bls = block_size if block_size is not None and block_size >= 0 else -1
228
236
  wpath = _as_unc_path(self.host, path)
229
237
  share_access = kwargs.pop("share_access", self.share_access)
@@ -245,6 +253,8 @@ class SMBFileSystem(AbstractFileSystem):
245
253
  """Copy within two locations in the same filesystem"""
246
254
  wpath1 = _as_unc_path(self.host, path1)
247
255
  wpath2 = _as_unc_path(self.host, path2)
256
+ if self.auto_mkdir:
257
+ self.makedirs(self._parent(path2), exist_ok=True)
248
258
  smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
249
259
 
250
260
  def _rm(self, path):
fsspec/json.py ADDED
@@ -0,0 +1,121 @@
1
+ import json
2
+ from contextlib import suppress
3
+ from pathlib import PurePath
4
+ from typing import (
5
+ Any,
6
+ Callable,
7
+ ClassVar,
8
+ Dict,
9
+ List,
10
+ Mapping,
11
+ Optional,
12
+ Sequence,
13
+ Tuple,
14
+ )
15
+
16
+ from .registry import _import_class, get_filesystem_class
17
+ from .spec import AbstractFileSystem
18
+
19
+
20
+ class FilesystemJSONEncoder(json.JSONEncoder):
21
+ include_password: ClassVar[bool] = True
22
+
23
+ def default(self, o: Any) -> Any:
24
+ if isinstance(o, AbstractFileSystem):
25
+ return o.to_dict(include_password=self.include_password)
26
+ if isinstance(o, PurePath):
27
+ cls = type(o)
28
+ return {"cls": f"{cls.__module__}.{cls.__name__}", "str": str(o)}
29
+
30
+ return super().default(o)
31
+
32
+ def make_serializable(self, obj: Any) -> Any:
33
+ """
34
+ Recursively converts an object so that it can be JSON serialized via
35
+ :func:`json.dumps` and :func:`json.dump`, without actually calling
36
+ said functions.
37
+ """
38
+ if isinstance(obj, (str, int, float, bool)):
39
+ return obj
40
+ if isinstance(obj, Mapping):
41
+ return {k: self.make_serializable(v) for k, v in obj.items()}
42
+ if isinstance(obj, Sequence):
43
+ return [self.make_serializable(v) for v in obj]
44
+
45
+ return self.default(obj)
46
+
47
+
48
+ class FilesystemJSONDecoder(json.JSONDecoder):
49
+ def __init__(
50
+ self,
51
+ *,
52
+ object_hook: Optional[Callable[[Dict[str, Any]], Any]] = None,
53
+ parse_float: Optional[Callable[[str], Any]] = None,
54
+ parse_int: Optional[Callable[[str], Any]] = None,
55
+ parse_constant: Optional[Callable[[str], Any]] = None,
56
+ strict: bool = True,
57
+ object_pairs_hook: Optional[Callable[[List[Tuple[str, Any]]], Any]] = None,
58
+ ) -> None:
59
+ self.original_object_hook = object_hook
60
+
61
+ super().__init__(
62
+ object_hook=self.custom_object_hook,
63
+ parse_float=parse_float,
64
+ parse_int=parse_int,
65
+ parse_constant=parse_constant,
66
+ strict=strict,
67
+ object_pairs_hook=object_pairs_hook,
68
+ )
69
+
70
+ @classmethod
71
+ def try_resolve_path_cls(cls, dct: Dict[str, Any]):
72
+ with suppress(Exception):
73
+ fqp = dct["cls"]
74
+
75
+ path_cls = _import_class(fqp)
76
+
77
+ if issubclass(path_cls, PurePath):
78
+ return path_cls
79
+
80
+ return None
81
+
82
+ @classmethod
83
+ def try_resolve_fs_cls(cls, dct: Dict[str, Any]):
84
+ with suppress(Exception):
85
+ if "cls" in dct:
86
+ try:
87
+ fs_cls = _import_class(dct["cls"])
88
+ if issubclass(fs_cls, AbstractFileSystem):
89
+ return fs_cls
90
+ except Exception:
91
+ if "protocol" in dct: # Fallback if cls cannot be imported
92
+ return get_filesystem_class(dct["protocol"])
93
+
94
+ raise
95
+
96
+ return None
97
+
98
+ def custom_object_hook(self, dct: Dict[str, Any]):
99
+ if "cls" in dct:
100
+ if (obj_cls := self.try_resolve_fs_cls(dct)) is not None:
101
+ return AbstractFileSystem.from_dict(dct)
102
+ if (obj_cls := self.try_resolve_path_cls(dct)) is not None:
103
+ return obj_cls(dct["str"])
104
+
105
+ if self.original_object_hook is not None:
106
+ return self.original_object_hook(dct)
107
+
108
+ return dct
109
+
110
+ def unmake_serializable(self, obj: Any) -> Any:
111
+ """
112
+ Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
113
+ """
114
+ if isinstance(obj, dict):
115
+ obj = self.custom_object_hook(obj)
116
+ if isinstance(obj, dict):
117
+ return {k: self.unmake_serializable(v) for k, v in obj.items()}
118
+ if isinstance(obj, (list, tuple)):
119
+ return [self.unmake_serializable(v) for v in obj]
120
+
121
+ return obj
fsspec/registry.py CHANGED
@@ -257,27 +257,33 @@ update the current installation.
257
257
  """
258
258
 
259
259
 
260
- def _import_class(cls, minv=None):
261
- """Take a string FQP and return the imported class or identifier
260
+ def _import_class(fqp: str):
261
+ """Take a fully-qualified path and return the imported class or identifier.
262
262
 
263
- cls is of the form "package.module.klass" or "package.module:subobject.klass"
263
+ ``fqp`` is of the form "package.module.klass" or
264
+ "package.module:subobject.klass".
265
+
266
+ Warnings
267
+ --------
268
+ This can import arbitrary modules. Make sure you haven't installed any modules
269
+ that may execute malicious code at import time.
264
270
  """
265
- if ":" in cls:
266
- mod, name = cls.rsplit(":", 1)
267
- s3 = mod == "s3fs"
268
- mod = importlib.import_module(mod)
269
- if s3 and mod.__version__.split(".") < ["0", "5"]:
270
- warnings.warn(s3_msg)
271
- for part in name.split("."):
272
- mod = getattr(mod, part)
273
- return mod
271
+ if ":" in fqp:
272
+ mod, name = fqp.rsplit(":", 1)
274
273
  else:
275
- mod, name = cls.rsplit(".", 1)
276
- s3 = mod == "s3fs"
277
- mod = importlib.import_module(mod)
278
- if s3 and mod.__version__.split(".") < ["0", "5"]:
279
- warnings.warn(s3_msg)
280
- return getattr(mod, name)
274
+ mod, name = fqp.rsplit(".", 1)
275
+
276
+ is_s3 = mod == "s3fs"
277
+ mod = importlib.import_module(mod)
278
+ if is_s3 and mod.__version__.split(".") < ["0", "5"]:
279
+ warnings.warn(s3_msg)
280
+ for part in name.split("."):
281
+ mod = getattr(mod, part)
282
+
283
+ if not isinstance(mod, type):
284
+ raise TypeError(f"{fqp} is not a class")
285
+
286
+ return mod
281
287
 
282
288
 
283
289
  def filesystem(protocol, **storage_options):
fsspec/spec.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import io
4
+ import json
4
5
  import logging
5
6
  import os
6
7
  import threading
@@ -9,7 +10,7 @@ import weakref
9
10
  from errno import ESPIPE
10
11
  from glob import has_magic
11
12
  from hashlib import sha256
12
- from typing import ClassVar
13
+ from typing import Any, ClassVar, Dict, Tuple
13
14
 
14
15
  from .callbacks import DEFAULT_CALLBACK
15
16
  from .config import apply_config, conf
@@ -115,6 +116,10 @@ class AbstractFileSystem(metaclass=_Cached):
115
116
  #: Extra *class attributes* that should be considered when hashing.
116
117
  _extra_tokenize_attributes = ()
117
118
 
119
+ # Set by _Cached metaclass
120
+ storage_args: Tuple[Any, ...]
121
+ storage_options: Dict[str, Any]
122
+
118
123
  def __init__(self, *args, **storage_options):
119
124
  """Create and configure file-system instance
120
125
 
@@ -1381,41 +1386,45 @@ class AbstractFileSystem(metaclass=_Cached):
1381
1386
  length = size - offset
1382
1387
  return read_block(f, offset, length, delimiter)
1383
1388
 
1384
- def to_json(self):
1389
+ def to_json(self, *, include_password: bool = True) -> str:
1385
1390
  """
1386
- JSON representation of this filesystem instance
1391
+ JSON representation of this filesystem instance.
1392
+
1393
+ Parameters
1394
+ ----------
1395
+ include_password: bool, default True
1396
+ Whether to include the password (if any) in the output.
1387
1397
 
1388
1398
  Returns
1389
1399
  -------
1390
- str: JSON structure with keys cls (the python location of this class),
1391
- protocol (text name of this class's protocol, first one in case of
1392
- multiple), args (positional args, usually empty), and all other
1393
- kwargs as their own keys.
1400
+ JSON string with keys ``cls`` (the python location of this class),
1401
+ protocol (text name of this class's protocol, first one in case of
1402
+ multiple), ``args`` (positional args, usually empty), and all other
1403
+ keyword arguments as their own keys.
1404
+
1405
+ Warnings
1406
+ --------
1407
+ Serialized filesystems may contain sensitive information which have been
1408
+ passed to the constructor, such as passwords and tokens. Make sure you
1409
+ store and send them in a secure environment!
1394
1410
  """
1395
- import json
1411
+ from .json import FilesystemJSONEncoder
1396
1412
 
1397
- cls = type(self)
1398
- cls = ".".join((cls.__module__, cls.__name__))
1399
- proto = (
1400
- self.protocol[0]
1401
- if isinstance(self.protocol, (tuple, list))
1402
- else self.protocol
1403
- )
1404
1413
  return json.dumps(
1405
- dict(
1406
- cls=cls,
1407
- protocol=proto,
1408
- args=self.storage_args,
1409
- **self.storage_options,
1410
- )
1414
+ self,
1415
+ cls=type(
1416
+ "_FilesystemJSONEncoder",
1417
+ (FilesystemJSONEncoder,),
1418
+ {"include_password": include_password},
1419
+ ),
1411
1420
  )
1412
1421
 
1413
1422
  @staticmethod
1414
- def from_json(blob):
1423
+ def from_json(blob: str) -> AbstractFileSystem:
1415
1424
  """
1416
- Recreate a filesystem instance from JSON representation
1425
+ Recreate a filesystem instance from JSON representation.
1417
1426
 
1418
- See ``.to_json()`` for the expected structure of the input
1427
+ See ``.to_json()`` for the expected structure of the input.
1419
1428
 
1420
1429
  Parameters
1421
1430
  ----------
@@ -1424,18 +1433,95 @@ class AbstractFileSystem(metaclass=_Cached):
1424
1433
  Returns
1425
1434
  -------
1426
1435
  file system instance, not necessarily of this particular class.
1436
+
1437
+ Warnings
1438
+ --------
1439
+ This can import arbitrary modules (as determined by the ``cls`` key).
1440
+ Make sure you haven't installed any modules that may execute malicious code
1441
+ at import time.
1427
1442
  """
1428
- import json
1443
+ from .json import FilesystemJSONDecoder
1429
1444
 
1430
- from .registry import _import_class, get_filesystem_class
1445
+ return json.loads(blob, cls=FilesystemJSONDecoder)
1431
1446
 
1432
- dic = json.loads(blob)
1433
- protocol = dic.pop("protocol")
1434
- try:
1435
- cls = _import_class(dic.pop("cls"))
1436
- except (ImportError, ValueError, RuntimeError, KeyError):
1437
- cls = get_filesystem_class(protocol)
1438
- return cls(*dic.pop("args", ()), **dic)
1447
+ def to_dict(self, *, include_password: bool = True) -> Dict[str, Any]:
1448
+ """
1449
+ JSON-serializable dictionary representation of this filesystem instance.
1450
+
1451
+ Parameters
1452
+ ----------
1453
+ include_password: bool, default True
1454
+ Whether to include the password (if any) in the output.
1455
+
1456
+ Returns
1457
+ -------
1458
+ Dictionary with keys ``cls`` (the python location of this class),
1459
+ protocol (text name of this class's protocol, first one in case of
1460
+ multiple), ``args`` (positional args, usually empty), and all other
1461
+ keyword arguments as their own keys.
1462
+
1463
+ Warnings
1464
+ --------
1465
+ Serialized filesystems may contain sensitive information which have been
1466
+ passed to the constructor, such as passwords and tokens. Make sure you
1467
+ store and send them in a secure environment!
1468
+ """
1469
+ from .json import FilesystemJSONEncoder
1470
+
1471
+ json_encoder = FilesystemJSONEncoder()
1472
+
1473
+ cls = type(self)
1474
+ proto = self.protocol
1475
+
1476
+ storage_options = dict(self.storage_options)
1477
+ if not include_password:
1478
+ storage_options.pop("password", None)
1479
+
1480
+ return dict(
1481
+ cls=f"{cls.__module__}:{cls.__name__}",
1482
+ protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
1483
+ args=json_encoder.make_serializable(self.storage_args),
1484
+ **json_encoder.make_serializable(storage_options),
1485
+ )
1486
+
1487
+ @staticmethod
1488
+ def from_dict(dct: Dict[str, Any]) -> AbstractFileSystem:
1489
+ """
1490
+ Recreate a filesystem instance from dictionary representation.
1491
+
1492
+ See ``.to_dict()`` for the expected structure of the input.
1493
+
1494
+ Parameters
1495
+ ----------
1496
+ dct: Dict[str, Any]
1497
+
1498
+ Returns
1499
+ -------
1500
+ file system instance, not necessarily of this particular class.
1501
+
1502
+ Warnings
1503
+ --------
1504
+ This can import arbitrary modules (as determined by the ``cls`` key).
1505
+ Make sure you haven't installed any modules that may execute malicious code
1506
+ at import time.
1507
+ """
1508
+ from .json import FilesystemJSONDecoder
1509
+
1510
+ json_decoder = FilesystemJSONDecoder()
1511
+
1512
+ dct = dict(dct) # Defensive copy
1513
+
1514
+ cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
1515
+ if cls is None:
1516
+ raise ValueError("Not a serialized AbstractFileSystem")
1517
+
1518
+ dct.pop("cls", None)
1519
+ dct.pop("protocol", None)
1520
+
1521
+ return cls(
1522
+ *json_decoder.unmake_serializable(dct.pop("args", ())),
1523
+ **json_decoder.unmake_serializable(dct),
1524
+ )
1439
1525
 
1440
1526
  def _get_pyarrow_filesystem(self):
1441
1527
  """
fsspec/utils.py CHANGED
@@ -435,7 +435,7 @@ def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
435
435
 
436
436
  def get_protocol(url: str) -> str:
437
437
  url = stringify_path(url)
438
- parts = re.split(r"(\:\:|\://)", url, 1)
438
+ parts = re.split(r"(\:\:|\://)", url, maxsplit=1)
439
439
  if len(parts) > 1:
440
440
  return parts[0]
441
441
  return "file"