fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,7 +40,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
40
40
 
41
41
  def ls(self, path, detail=True, **kwargs):
42
42
  path = self._strip_protocol(path)
43
- r = self.session.get(self.url + "/" + path)
43
+ r = self.session.get(f"{self.url}/{path}")
44
44
  if r.status_code == 404:
45
45
  return FileNotFoundError(path)
46
46
  r.raise_for_status()
@@ -61,7 +61,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
61
61
 
62
62
  def cat_file(self, path, start=None, end=None, **kwargs):
63
63
  path = self._strip_protocol(path)
64
- r = self.session.get(self.url + "/" + path)
64
+ r = self.session.get(f"{self.url}/{path}")
65
65
  if r.status_code == 404:
66
66
  return FileNotFoundError(path)
67
67
  r.raise_for_status()
@@ -83,7 +83,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
83
83
  "format": "base64",
84
84
  "type": "file",
85
85
  }
86
- self.session.put(self.url + "/" + path, json=json)
86
+ self.session.put(f"{self.url}/{path}", json=json)
87
87
 
88
88
  def mkdir(self, path, create_parents=True, **kwargs):
89
89
  path = self._strip_protocol(path)
@@ -96,11 +96,11 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
96
96
  "content": None,
97
97
  "type": "directory",
98
98
  }
99
- self.session.put(self.url + "/" + path, json=json)
99
+ self.session.put(f"{self.url}/{path}", json=json)
100
100
 
101
101
  def _rm(self, path):
102
102
  path = self._strip_protocol(path)
103
- self.session.delete(self.url + "/" + path)
103
+ self.session.delete(f"{self.url}/{path}")
104
104
 
105
105
  def _open(self, path, mode="rb", **kwargs):
106
106
  path = self._strip_protocol(path)
@@ -122,8 +122,7 @@ class LibArchiveFileSystem(AbstractArchiveFileSystem):
122
122
  files = open_files(fo, protocol=target_protocol, **(target_options or {}))
123
123
  if len(files) != 1:
124
124
  raise ValueError(
125
- 'Path "{}" did not resolve to exactly'
126
- 'one file: "{}"'.format(fo, files)
125
+ f'Path "{fo}" did not resolve to exactly one file: "{files}"'
127
126
  )
128
127
  fo = files[0]
129
128
  self.of = fo
@@ -29,7 +29,7 @@ class LocalFileSystem(AbstractFileSystem):
29
29
  """
30
30
 
31
31
  root_marker = "/"
32
- protocol = "file"
32
+ protocol = "file", "local"
33
33
  local_file = True
34
34
 
35
35
  def __init__(self, auto_mkdir=False, **kwargs):
@@ -98,7 +98,7 @@ class LocalFileSystem(AbstractFileSystem):
98
98
  "islink": link,
99
99
  }
100
100
  for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
101
- result[field] = getattr(out, "st_" + field)
101
+ result[field] = getattr(out, f"st_{field}")
102
102
  if result["islink"]:
103
103
  result["destination"] = os.readlink(path)
104
104
  try:
@@ -215,6 +215,10 @@ class LocalFileSystem(AbstractFileSystem):
215
215
  path = path[7:]
216
216
  elif path.startswith("file:"):
217
217
  path = path[5:]
218
+ elif path.startswith("local://"):
219
+ path = path[8:]
220
+ elif path.startswith("local:"):
221
+ path = path[6:]
218
222
  return make_path_posix(path).rstrip("/") or cls.root_marker
219
223
 
220
224
  def _isfilestore(self):
@@ -240,7 +244,7 @@ def make_path_posix(path, sep=os.sep):
240
244
  return path
241
245
  if path.startswith("./"):
242
246
  path = path[2:]
243
- return os.getcwd() + "/" + path
247
+ return f"{os.getcwd()}/{path}"
244
248
  if (
245
249
  (sep not in path and "/" not in path)
246
250
  or (sep == "/" and not path.startswith("/"))
@@ -251,7 +255,7 @@ def make_path_posix(path, sep=os.sep):
251
255
  # abspath made some more '\\' separators
252
256
  return make_path_posix(osp.abspath(path))
253
257
  else:
254
- return os.getcwd() + "/" + path
258
+ return f"{os.getcwd()}/{path}"
255
259
  if path.startswith("file://"):
256
260
  path = path[7:]
257
261
  if re.match("/[A-Za-z]:", path):
@@ -175,7 +175,7 @@ class MemoryFileSystem(AbstractFileSystem):
175
175
  parent = self._parent(parent)
176
176
  if self.isfile(parent):
177
177
  raise FileExistsError(parent)
178
- if mode in ["rb", "ab", "rb+"]:
178
+ if mode in ["rb", "ab", "r+b"]:
179
179
  if path in self.store:
180
180
  f = self.store[path]
181
181
  if mode == "ab":
@@ -82,8 +82,12 @@ def ravel_multi_index(idx, sizes):
82
82
 
83
83
 
84
84
  class LazyReferenceMapper(collections.abc.MutableMapping):
85
- """Interface to read parquet store as if it were a standard kerchunk
86
- references dict."""
85
+ """This interface can be used to read/write references from Parquet stores.
86
+ It is not intended for other types of references.
87
+ It can be used with Kerchunk's MultiZarrToZarr method to combine
88
+ references into a parquet store.
89
+ Examples of this use-case can be found here:
90
+ https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
87
91
 
88
92
  # import is class level to prevent numpy dep requirement for fsspec
89
93
  @property
@@ -108,17 +112,24 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
108
112
  Root of parquet store
109
113
  fs : fsspec.AbstractFileSystem
110
114
  fsspec filesystem object, default is local filesystem.
111
- cache_size : int
115
+ cache_size : int, default=128
112
116
  Maximum size of LRU cache, where cache_size*record_size denotes
113
117
  the total number of references that can be loaded in memory at once.
118
+ categorical_threshold : int
119
+ Encode urls as pandas.Categorical to reduce memory footprint if the ratio
120
+ of the number of unique urls to total number of refs for each variable
121
+ is greater than or equal to this number. (default 10)
122
+
123
+
114
124
  """
115
125
  self.root = root
116
126
  self.chunk_sizes = {}
117
127
  self._items = {}
118
128
  self.dirs = None
119
129
  self.fs = fsspec.filesystem("file") if fs is None else fs
120
- with self.fs.open("/".join([self.root, ".zmetadata"]), "rb") as f:
121
- self._items[".zmetadata"] = f.read()
130
+ self._items[".zmetadata"] = self.fs.cat_file(
131
+ "/".join([self.root, ".zmetadata"])
132
+ )
122
133
  met = json.loads(self._items[".zmetadata"])
123
134
  self.record_size = met["record_size"]
124
135
  self.zmetadata = met["metadata"]
@@ -131,18 +142,37 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
131
142
  def open_refs(field, record):
132
143
  """cached parquet file loader"""
133
144
  path = self.url.format(field=field, record=record)
134
- with self.fs.open(path) as f:
135
- # TODO: since all we do is iterate, is arrow without pandas
136
- # better here?
137
- df = self.pd.read_parquet(f, engine="fastparquet")
145
+ data = io.BytesIO(self.fs.cat_file(path))
146
+ df = self.pd.read_parquet(data, engine="fastparquet")
138
147
  refs = {c: df[c].values for c in df.columns}
139
148
  return refs
140
149
 
141
150
  self.open_refs = open_refs
142
151
 
143
152
  @staticmethod
144
- def create(record_size, root, fs, **kwargs):
153
+ def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
154
+ """Make empty parquet reference set
155
+
156
+ Parameters
157
+ ----------
158
+ root: str
159
+ Directory to contain the output; will be created
160
+ storage_options: dict | None
161
+ For making the filesystem to use for writing is fs is None
162
+ fs: FileSystem | None
163
+ Filesystem for writing
164
+ record_size: int
165
+ Number of references per parquet file
166
+ kwargs: passed to __init__
167
+
168
+ Returns
169
+ -------
170
+ LazyReferenceMapper instance
171
+ """
145
172
  met = {"metadata": {}, "record_size": record_size}
173
+ if fs is None:
174
+ fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
175
+ fs.makedirs(root, exist_ok=True)
146
176
  fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
147
177
  return LazyReferenceMapper(root, fs, **kwargs)
148
178
 
@@ -283,7 +313,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
283
313
  def _generate_record(self, field, record):
284
314
  """The references for a given parquet file of a given field"""
285
315
  refs = self.open_refs(field, record)
286
- it = iter(zip(refs.values()))
316
+ it = iter(zip(*refs.values()))
287
317
  if len(refs) == 3:
288
318
  # All urls
289
319
  return (list(t) for t in it)
@@ -594,7 +624,7 @@ class ReferenceFileSystem(AsyncFileSystem):
594
624
  **(ref_storage_args or target_options or {}), protocol=target_protocol
595
625
  )
596
626
  ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
597
- if ref_fs.isfile(fo):
627
+ if ref_fs.isfile(fo2):
598
628
  # text JSON
599
629
  with fsspec.open(fo, "rb", **dic) as f:
600
630
  logger.info("Read reference from URL %s", fo)
@@ -641,6 +671,7 @@ class ReferenceFileSystem(AsyncFileSystem):
641
671
  self.fss[protocol] = fs
642
672
  if remote_protocol is None:
643
673
  # get single protocol from references
674
+ # TODO: warning here, since this can be very expensive?
644
675
  for ref in self.references.values():
645
676
  if callable(ref):
646
677
  ref = ref()
@@ -763,24 +794,27 @@ class ReferenceFileSystem(AsyncFileSystem):
763
794
  raise NotImplementedError
764
795
  if isinstance(path, list) and (recursive or any("*" in p for p in path)):
765
796
  raise NotImplementedError
797
+ # TODO: if references is lazy, pre-fetch all paths in batch before access
766
798
  proto_dict = _protocol_groups(path, self.references)
767
799
  out = {}
768
800
  for proto, paths in proto_dict.items():
769
801
  fs = self.fss[proto]
770
- urls, starts, ends = [], [], []
802
+ urls, starts, ends, valid_paths = [], [], [], []
771
803
  for p in paths:
772
804
  # find references or label not-found. Early exit if any not
773
805
  # found and on_error is "raise"
774
806
  try:
775
807
  u, s, e = self._cat_common(p)
776
- urls.append(u)
777
- starts.append(s)
778
- ends.append(e)
779
808
  except FileNotFoundError as err:
780
809
  if on_error == "raise":
781
810
  raise
782
811
  if on_error != "omit":
783
812
  out[p] = err
813
+ else:
814
+ urls.append(u)
815
+ starts.append(s)
816
+ ends.append(e)
817
+ valid_paths.append(p)
784
818
 
785
819
  # process references into form for merging
786
820
  urls2 = []
@@ -788,7 +822,7 @@ class ReferenceFileSystem(AsyncFileSystem):
788
822
  ends2 = []
789
823
  paths2 = []
790
824
  whole_files = set()
791
- for u, s, e, p in zip(urls, starts, ends, paths):
825
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
792
826
  if isinstance(u, bytes):
793
827
  # data
794
828
  out[p] = u
@@ -800,7 +834,7 @@ class ReferenceFileSystem(AsyncFileSystem):
800
834
  starts2.append(s)
801
835
  ends2.append(e)
802
836
  paths2.append(p)
803
- for u, s, e, p in zip(urls, starts, ends, paths):
837
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
804
838
  # second run to account for files that are to be loaded whole
805
839
  if s is not None and u not in whole_files:
806
840
  urls2.append(u)
@@ -820,7 +854,7 @@ class ReferenceFileSystem(AsyncFileSystem):
820
854
  bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
821
855
 
822
856
  # unbundle from merged bytes - simple approach
823
- for u, s, e, p in zip(urls, starts, ends, paths):
857
+ for u, s, e, p in zip(urls, starts, ends, valid_paths):
824
858
  if p in out:
825
859
  continue # was bytes, already handled
826
860
  for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
@@ -954,16 +988,24 @@ class ReferenceFileSystem(AsyncFileSystem):
954
988
  elif len(part) == 1:
955
989
  size = None
956
990
  else:
957
- _, start, size = part
991
+ _, _, size = part
958
992
  par = path.rsplit("/", 1)[0] if "/" in path else ""
959
993
  par0 = par
994
+ subdirs = [par0]
960
995
  while par0 and par0 not in self.dircache:
961
- # build parent directories
962
- self.dircache[par0] = []
963
- self.dircache.setdefault(
964
- par0.rsplit("/", 1)[0] if "/" in par0 else "", []
965
- ).append({"name": par0, "type": "directory", "size": 0})
996
+ # collect parent directories
966
997
  par0 = self._parent(par0)
998
+ subdirs.append(par0)
999
+
1000
+ subdirs = subdirs[::-1]
1001
+ for parent, child in zip(subdirs, subdirs[1:]):
1002
+ # register newly discovered directories
1003
+ assert child not in self.dircache
1004
+ assert parent in self.dircache
1005
+ self.dircache[parent].append(
1006
+ {"name": child, "type": "directory", "size": 0}
1007
+ )
1008
+ self.dircache[child] = []
967
1009
 
968
1010
  self.dircache[par].append({"name": path, "type": "file", "size": size})
969
1011
 
@@ -41,14 +41,14 @@ class SFTPFileSystem(AbstractFileSystem):
41
41
  """
42
42
  if self._cached:
43
43
  return
44
- super(SFTPFileSystem, self).__init__(**ssh_kwargs)
44
+ super().__init__(**ssh_kwargs)
45
45
  self.temppath = ssh_kwargs.pop("temppath", "/tmp") # remote temp directory
46
46
  self.host = host
47
47
  self.ssh_kwargs = ssh_kwargs
48
48
  self._connect()
49
49
 
50
50
  def _connect(self):
51
- logger.debug("Connecting to SFTP server %s" % self.host)
51
+ logger.debug("Connecting to SFTP server %s", self.host)
52
52
  self.client = paramiko.SSHClient()
53
53
  self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
54
54
  self.client.connect(self.host, **self.ssh_kwargs)
@@ -66,9 +66,9 @@ class SFTPFileSystem(AbstractFileSystem):
66
66
  return out
67
67
 
68
68
  def mkdir(self, path, create_parents=False, mode=511):
69
- logger.debug("Creating folder %s" % path)
69
+ logger.debug("Creating folder %s", path)
70
70
  if self.exists(path):
71
- raise FileExistsError("File exists: {}".format(path))
71
+ raise FileExistsError(f"File exists: {path}")
72
72
 
73
73
  if create_parents:
74
74
  self.makedirs(path)
@@ -77,18 +77,18 @@ class SFTPFileSystem(AbstractFileSystem):
77
77
 
78
78
  def makedirs(self, path, exist_ok=False, mode=511):
79
79
  if self.exists(path) and not exist_ok:
80
- raise FileExistsError("File exists: {}".format(path))
80
+ raise FileExistsError(f"File exists: {path}")
81
81
 
82
82
  parts = path.split("/")
83
83
  path = ""
84
84
 
85
85
  for part in parts:
86
- path += "/" + part
86
+ path += f"/{part}"
87
87
  if not self.exists(path):
88
88
  self.ftp.mkdir(path, mode)
89
89
 
90
90
  def rmdir(self, path):
91
- logger.debug("Removing folder %s" % path)
91
+ logger.debug("Removing folder %s", path)
92
92
  self.ftp.rmdir(path)
93
93
 
94
94
  def info(self, path):
@@ -122,7 +122,7 @@ class SFTPFileSystem(AbstractFileSystem):
122
122
  return out
123
123
 
124
124
  def ls(self, path, detail=False):
125
- logger.debug("Listing folder %s" % path)
125
+ logger.debug("Listing folder %s", path)
126
126
  stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
127
127
  if detail:
128
128
  return stats
@@ -131,7 +131,7 @@ class SFTPFileSystem(AbstractFileSystem):
131
131
  return sorted(paths)
132
132
 
133
133
  def put(self, lpath, rpath, callback=None, **kwargs):
134
- logger.debug("Put file %s into %s" % (lpath, rpath))
134
+ logger.debug("Put file %s into %s", lpath, rpath)
135
135
  self.ftp.put(lpath, rpath)
136
136
 
137
137
  def get_file(self, rpath, lpath, **kwargs):
@@ -146,7 +146,7 @@ class SFTPFileSystem(AbstractFileSystem):
146
146
  If 0, no buffering, if 1, line buffering, if >1, buffer that many
147
147
  bytes, if None use default from paramiko.
148
148
  """
149
- logger.debug("Opening file %s" % path)
149
+ logger.debug("Opening file %s", path)
150
150
  if kwargs.get("autocommit", True) is False:
151
151
  # writes to temporary file, move on commit
152
152
  path2 = "/".join([self.temppath, str(uuid.uuid4())])
@@ -167,7 +167,7 @@ class SFTPFileSystem(AbstractFileSystem):
167
167
  self.ftp.remove(path)
168
168
 
169
169
  def mv(self, old, new):
170
- logger.debug("Renaming %s into %s" % (old, new))
170
+ logger.debug("Renaming %s into %s", old, new)
171
171
  self.ftp.posix_rename(old, new)
172
172
 
173
173
 
@@ -1,4 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
2
  This module contains SMBFileSystem class responsible for handling access to
4
3
  Windows Samba network shares by using package smbprotocol
@@ -103,7 +102,7 @@ class SMBFileSystem(AbstractFileSystem):
103
102
  - 'w': Allow other handles to be opened with write access.
104
103
  - 'd': Allow other handles to be opened with delete access.
105
104
  """
106
- super(SMBFileSystem, self).__init__(**kwargs)
105
+ super().__init__(**kwargs)
107
106
  self.host = host
108
107
  self.port = port
109
108
  self.username = username
@@ -248,7 +247,7 @@ class SMBFileSystem(AbstractFileSystem):
248
247
  else:
249
248
  smbclient.remove(wpath, port=self._port)
250
249
 
251
- def mv(self, path1, path2, **kwargs):
250
+ def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
252
251
  wpath1 = _as_unc_path(self.host, path1)
253
252
  wpath2 = _as_unc_path(self.host, path2)
254
253
  smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
@@ -256,13 +255,13 @@ class SMBFileSystem(AbstractFileSystem):
256
255
 
257
256
  def _as_unc_path(host, path):
258
257
  rpath = path.replace("/", "\\")
259
- unc = "\\\\{}{}".format(host, rpath)
258
+ unc = f"\\\\{host}{rpath}"
260
259
  return unc
261
260
 
262
261
 
263
262
  def _as_temp_path(host, path, temppath):
264
263
  share = path.split("/")[1]
265
- temp_file = "/{}{}/{}".format(share, temppath, uuid.uuid4())
264
+ temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
266
265
  unc = _as_unc_path(host, temp_file)
267
266
  return unc
268
267
 
@@ -21,7 +21,7 @@ class WebHDFS(AbstractFileSystem):
21
21
  """
22
22
  Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
23
23
 
24
- Three auth mechanisms are supported:
24
+ Four auth mechanisms are supported:
25
25
 
26
26
  insecure: no auth is done, and the user is assumed to be whoever they
27
27
  say they are (parameter ``user``), or a predefined value such as
@@ -34,6 +34,8 @@ class WebHDFS(AbstractFileSystem):
34
34
  service. Indeed, this client can also generate such tokens when
35
35
  not insecure. Note that tokens expire, but can be renewed (by a
36
36
  previously specified user) and may allow for proxying.
37
+ basic-auth: used when both parameter ``user`` and parameter ``password``
38
+ are provided.
37
39
 
38
40
  """
39
41
 
@@ -47,6 +49,7 @@ class WebHDFS(AbstractFileSystem):
47
49
  kerberos=False,
48
50
  token=None,
49
51
  user=None,
52
+ password=None,
50
53
  proxy_to=None,
51
54
  kerb_kwargs=None,
52
55
  data_proxy=None,
@@ -68,6 +71,9 @@ class WebHDFS(AbstractFileSystem):
68
71
  given
69
72
  user: str or None
70
73
  If given, assert the user name to connect with
74
+ password: str or None
75
+ If given, assert the password to use for basic auth. If password
76
+ is provided, user must be provided also
71
77
  proxy_to: str or None
72
78
  If given, the user has the authority to proxy, and this value is
73
79
  the user in who's name actions are taken
@@ -89,9 +95,7 @@ class WebHDFS(AbstractFileSystem):
89
95
  if self._cached:
90
96
  return
91
97
  super().__init__(**kwargs)
92
- self.url = "{protocol}://{host}:{port}/webhdfs/v1".format(
93
- protocol="https" if use_https else "http", host=host, port=port
94
- )
98
+ self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"
95
99
  self.kerb = kerberos
96
100
  self.kerb_kwargs = kerb_kwargs or {}
97
101
  self.pars = {}
@@ -104,8 +108,19 @@ class WebHDFS(AbstractFileSystem):
104
108
  " token"
105
109
  )
106
110
  self.pars["delegation"] = token
107
- if user is not None:
108
- self.pars["user.name"] = user
111
+ self.user = user
112
+ self.password = password
113
+
114
+ if password is not None:
115
+ if user is None:
116
+ raise ValueError(
117
+ "If passing a password, the user must also be"
118
+ "set in order to set up the basic-auth"
119
+ )
120
+ else:
121
+ if user is not None:
122
+ self.pars["user.name"] = user
123
+
109
124
  if proxy_to is not None:
110
125
  self.pars["doas"] = proxy_to
111
126
  if kerberos and user is not None:
@@ -115,7 +130,7 @@ class WebHDFS(AbstractFileSystem):
115
130
  )
116
131
  self._connect()
117
132
 
118
- self._fsid = "webhdfs_" + tokenize(host, port)
133
+ self._fsid = f"webhdfs_{tokenize(host, port)}"
119
134
 
120
135
  @property
121
136
  def fsid(self):
@@ -128,8 +143,13 @@ class WebHDFS(AbstractFileSystem):
128
143
 
129
144
  self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
130
145
 
146
+ if self.user is not None and self.password is not None:
147
+ from requests.auth import HTTPBasicAuth
148
+
149
+ self.session.auth = HTTPBasicAuth(self.user, self.password)
150
+
131
151
  def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
132
- url = self.url + quote(path or "")
152
+ url = self._apply_proxy(self.url + quote(path or "", safe="/="))
133
153
  args = kwargs.copy()
134
154
  args.update(self.pars)
135
155
  args["op"] = op.upper()
@@ -113,10 +113,10 @@ class ZipFileSystem(AbstractArchiveFileSystem):
113
113
  path = self._strip_protocol(path)
114
114
  if "r" in mode and self.mode in set("wa"):
115
115
  if self.exists(path):
116
- raise IOError("ZipFS can only be open for reading or writing, not both")
116
+ raise OSError("ZipFS can only be open for reading or writing, not both")
117
117
  raise FileNotFoundError(path)
118
118
  if "r" in self.mode and "w" in mode:
119
- raise IOError("ZipFS can only be open for reading or writing, not both")
119
+ raise OSError("ZipFS can only be open for reading or writing, not both")
120
120
  out = self.zip.open(path, mode.strip("b"))
121
121
  if "r" in mode:
122
122
  info = self.info(path)
fsspec/mapping.py CHANGED
@@ -54,8 +54,8 @@ class FSMap(MutableMapping):
54
54
  if check:
55
55
  if not self.fs.exists(root):
56
56
  raise ValueError(
57
- "Path %s does not exist. Create "
58
- " with the ``create=True`` keyword" % root
57
+ f"Path {root} does not exist. Create "
58
+ f" with the ``create=True`` keyword"
59
59
  )
60
60
  self.fs.touch(root + "/a")
61
61
  self.fs.rm(root + "/a")
fsspec/registry.py CHANGED
@@ -38,29 +38,31 @@ def register_implementation(name, cls, clobber=False, errtxt=None):
38
38
  if name in known_implementations and clobber is False:
39
39
  if cls != known_implementations[name]["class"]:
40
40
  raise ValueError(
41
- "Name (%s) already in the known_implementations and clobber "
42
- "is False" % name
41
+ f"Name ({name}) already in the known_implementations and clobber "
42
+ f"is False"
43
43
  )
44
44
  else:
45
45
  known_implementations[name] = {
46
46
  "class": cls,
47
- "err": errtxt or "%s import failed for protocol %s" % (cls, name),
47
+ "err": errtxt or f"{cls} import failed for protocol {name}",
48
48
  }
49
49
 
50
50
  else:
51
51
  if name in registry and clobber is False:
52
52
  if _registry[name] is not cls:
53
53
  raise ValueError(
54
- "Name (%s) already in the registry and clobber is False" % name
54
+ f"Name ({name}) already in the registry and clobber is False"
55
55
  )
56
56
  else:
57
57
  _registry[name] = cls
58
58
 
59
59
 
60
- # protocols mapped to the class which implements them. This dict can
60
+ # protocols mapped to the class which implements them. This dict can be
61
61
  # updated with register_implementation
62
62
  known_implementations = {
63
+ "data": {"class": "fsspec.implementations.data.DataFileSystem"},
63
64
  "file": {"class": "fsspec.implementations.local.LocalFileSystem"},
65
+ "local": {"class": "fsspec.implementations.local.LocalFileSystem"},
64
66
  "memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
65
67
  "dropbox": {
66
68
  "class": "dropboxdrivefs.DropboxDriveFileSystem",
@@ -228,7 +230,7 @@ def get_filesystem_class(protocol):
228
230
 
229
231
  if protocol not in registry:
230
232
  if protocol not in known_implementations:
231
- raise ValueError("Protocol not known: %s" % protocol)
233
+ raise ValueError(f"Protocol not known: {protocol}")
232
234
  bit = known_implementations[protocol]
233
235
  try:
234
236
  register_implementation(protocol, _import_class(bit["class"]))