fsspec 2024.6.1__py3-none-any.whl → 2024.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fsspec/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '2024.6.1'
16
- __version_tuple__ = version_tuple = (2024, 6, 1)
15
+ __version__ = version = '2024.9.0'
16
+ __version_tuple__ = version_tuple = (2024, 9, 0)
fsspec/asyn.py CHANGED
@@ -1072,7 +1072,7 @@ class AbstractAsyncStreamedFile(AbstractBufferedFile):
1072
1072
  self.offset = 0
1073
1073
  try:
1074
1074
  await self._initiate_upload()
1075
- except: # noqa: E722
1075
+ except:
1076
1076
  self.closed = True
1077
1077
  raise
1078
1078
 
fsspec/core.py CHANGED
@@ -639,7 +639,7 @@ def get_fs_token_paths(
639
639
  if isinstance(urlpath, (list, tuple, set)):
640
640
  if not urlpath:
641
641
  raise ValueError("empty urlpath sequence")
642
- urlpath0 = stringify_path(list(urlpath)[0])
642
+ urlpath0 = stringify_path(next(iter(urlpath)))
643
643
  else:
644
644
  urlpath0 = stringify_path(urlpath)
645
645
  storage_options = storage_options or {}
fsspec/fuse.py CHANGED
@@ -31,8 +31,8 @@ class FUSEr(Operations):
31
31
  path = "".join([self.root, path.lstrip("/")]).rstrip("/")
32
32
  try:
33
33
  info = self.fs.info(path)
34
- except FileNotFoundError:
35
- raise FuseOSError(ENOENT)
34
+ except FileNotFoundError as exc:
35
+ raise FuseOSError(ENOENT) from exc
36
36
 
37
37
  data = {"st_uid": info.get("uid", 1000), "st_gid": info.get("gid", 1000)}
38
38
  perm = info.get("mode", 0o777)
@@ -119,8 +119,8 @@ class FUSEr(Operations):
119
119
  fn = "".join([self.root, path.lstrip("/")])
120
120
  try:
121
121
  self.fs.rm(fn, False)
122
- except (OSError, FileNotFoundError):
123
- raise FuseOSError(EIO)
122
+ except (OSError, FileNotFoundError) as exc:
123
+ raise FuseOSError(EIO) from exc
124
124
 
125
125
  def release(self, path, fh):
126
126
  try:
fsspec/gui.py CHANGED
@@ -93,8 +93,10 @@ class SigSlot:
93
93
  """Display in a notebook or a server"""
94
94
  try:
95
95
  return self.panel._repr_mimebundle_(*args, **kwargs)
96
- except (ValueError, AttributeError):
97
- raise NotImplementedError("Panel does not seem to be set up properly")
96
+ except (ValueError, AttributeError) as exc:
97
+ raise NotImplementedError(
98
+ "Panel does not seem to be set up properly"
99
+ ) from exc
98
100
 
99
101
  def connect(self, signal, slot):
100
102
  """Associate call back with given event
@@ -128,7 +128,7 @@ class ArrowFSWrapper(AbstractFileSystem):
128
128
  with self.open(tmp_fname, "wb") as rstream:
129
129
  shutil.copyfileobj(lstream, rstream)
130
130
  self.fs.move(tmp_fname, path2)
131
- except BaseException: # noqa
131
+ except BaseException:
132
132
  with suppress(FileNotFoundError):
133
133
  self.fs.delete_file(tmp_fname)
134
134
  raise
@@ -77,9 +77,9 @@ class DatabricksFileSystem(AbstractFileSystem):
77
77
  )
78
78
  except DatabricksException as e:
79
79
  if e.error_code == "RESOURCE_DOES_NOT_EXIST":
80
- raise FileNotFoundError(e.message)
80
+ raise FileNotFoundError(e.message) from e
81
81
 
82
- raise e
82
+ raise
83
83
  files = r["files"]
84
84
  out = [
85
85
  {
@@ -123,9 +123,9 @@ class DatabricksFileSystem(AbstractFileSystem):
123
123
  self._send_to_api(method="post", endpoint="mkdirs", json={"path": path})
124
124
  except DatabricksException as e:
125
125
  if e.error_code == "RESOURCE_ALREADY_EXISTS":
126
- raise FileExistsError(e.message)
126
+ raise FileExistsError(e.message) from e
127
127
 
128
- raise e
128
+ raise
129
129
  self.invalidate_cache(self._parent(path))
130
130
 
131
131
  def mkdir(self, path, create_parents=True, **kwargs):
@@ -169,9 +169,9 @@ class DatabricksFileSystem(AbstractFileSystem):
169
169
  self.rm(path=path, recursive=recursive)
170
170
  elif e.error_code == "IO_ERROR":
171
171
  # Using the same exception as the os module would use here
172
- raise OSError(e.message)
172
+ raise OSError(e.message) from e
173
173
 
174
- raise e
174
+ raise
175
175
  self.invalidate_cache(self._parent(path))
176
176
 
177
177
  def mv(
@@ -212,11 +212,11 @@ class DatabricksFileSystem(AbstractFileSystem):
212
212
  )
213
213
  except DatabricksException as e:
214
214
  if e.error_code == "RESOURCE_DOES_NOT_EXIST":
215
- raise FileNotFoundError(e.message)
215
+ raise FileNotFoundError(e.message) from e
216
216
  elif e.error_code == "RESOURCE_ALREADY_EXISTS":
217
- raise FileExistsError(e.message)
217
+ raise FileExistsError(e.message) from e
218
218
 
219
- raise e
219
+ raise
220
220
  self.invalidate_cache(self._parent(source_path))
221
221
  self.invalidate_cache(self._parent(destination_path))
222
222
 
@@ -264,9 +264,9 @@ class DatabricksFileSystem(AbstractFileSystem):
264
264
  try:
265
265
  exception_json = e.response.json()
266
266
  except Exception:
267
- raise e
267
+ raise e from None
268
268
 
269
- raise DatabricksException(**exception_json)
269
+ raise DatabricksException(**exception_json) from e
270
270
 
271
271
  return r.json()
272
272
 
@@ -297,9 +297,9 @@ class DatabricksFileSystem(AbstractFileSystem):
297
297
  return r["handle"]
298
298
  except DatabricksException as e:
299
299
  if e.error_code == "RESOURCE_ALREADY_EXISTS":
300
- raise FileExistsError(e.message)
300
+ raise FileExistsError(e.message) from e
301
301
 
302
- raise e
302
+ raise
303
303
 
304
304
  def _close_handle(self, handle):
305
305
  """
@@ -314,9 +314,9 @@ class DatabricksFileSystem(AbstractFileSystem):
314
314
  self._send_to_api(method="post", endpoint="close", json={"handle": handle})
315
315
  except DatabricksException as e:
316
316
  if e.error_code == "RESOURCE_DOES_NOT_EXIST":
317
- raise FileNotFoundError(e.message)
317
+ raise FileNotFoundError(e.message) from e
318
318
 
319
- raise e
319
+ raise
320
320
 
321
321
  def _add_data(self, handle, data):
322
322
  """
@@ -342,11 +342,11 @@ class DatabricksFileSystem(AbstractFileSystem):
342
342
  )
343
343
  except DatabricksException as e:
344
344
  if e.error_code == "RESOURCE_DOES_NOT_EXIST":
345
- raise FileNotFoundError(e.message)
345
+ raise FileNotFoundError(e.message) from e
346
346
  elif e.error_code == "MAX_BLOCK_SIZE_EXCEEDED":
347
- raise ValueError(e.message)
347
+ raise ValueError(e.message) from e
348
348
 
349
- raise e
349
+ raise
350
350
 
351
351
  def _get_data(self, path, start, end):
352
352
  """
@@ -372,11 +372,11 @@ class DatabricksFileSystem(AbstractFileSystem):
372
372
  return base64.b64decode(r["data"])
373
373
  except DatabricksException as e:
374
374
  if e.error_code == "RESOURCE_DOES_NOT_EXIST":
375
- raise FileNotFoundError(e.message)
375
+ raise FileNotFoundError(e.message) from e
376
376
  elif e.error_code in ["INVALID_PARAMETER_VALUE", "MAX_READ_SIZE_EXCEEDED"]:
377
- raise ValueError(e.message)
377
+ raise ValueError(e.message) from e
378
378
 
379
- raise e
379
+ raise
380
380
 
381
381
  def invalidate_cache(self, path=None):
382
382
  if path is None:
@@ -64,9 +64,15 @@ class DirFileSystem(AsyncFileSystem):
64
64
  if isinstance(path, str):
65
65
  if not self.path:
66
66
  return path
67
- if path == self.path:
67
+ # We need to account for S3FileSystem returning paths that do not
68
+ # start with a '/'
69
+ if path == self.path or (
70
+ self.path.startswith(self.fs.sep) and path == self.path[1:]
71
+ ):
68
72
  return ""
69
73
  prefix = self.path + self.fs.sep
74
+ if self.path.startswith(self.fs.sep) and not path.startswith(self.fs.sep):
75
+ prefix = prefix[1:]
70
76
  assert path.startswith(prefix)
71
77
  return path[len(prefix) :]
72
78
  return [self._relpath(_path) for _path in path]
@@ -2,7 +2,7 @@ import os
2
2
  import sys
3
3
  import uuid
4
4
  import warnings
5
- from ftplib import FTP, Error, error_perm
5
+ from ftplib import FTP, FTP_TLS, Error, error_perm
6
6
  from typing import Any
7
7
 
8
8
  from ..spec import AbstractBufferedFile, AbstractFileSystem
@@ -27,6 +27,7 @@ class FTPFileSystem(AbstractFileSystem):
27
27
  tempdir=None,
28
28
  timeout=30,
29
29
  encoding="utf-8",
30
+ tls=False,
30
31
  **kwargs,
31
32
  ):
32
33
  """
@@ -56,28 +57,37 @@ class FTPFileSystem(AbstractFileSystem):
56
57
  Timeout of the ftp connection in seconds
57
58
  encoding: str
58
59
  Encoding to use for directories and filenames in FTP connection
60
+ tls: bool
61
+ Use FTP-TLS, by default False
59
62
  """
60
63
  super().__init__(**kwargs)
61
64
  self.host = host
62
65
  self.port = port
63
66
  self.tempdir = tempdir or "/tmp"
64
- self.cred = username, password, acct
67
+ self.cred = username or "", password or "", acct or ""
65
68
  self.timeout = timeout
66
69
  self.encoding = encoding
67
70
  if block_size is not None:
68
71
  self.blocksize = block_size
69
72
  else:
70
73
  self.blocksize = 2**16
74
+ self.tls = tls
71
75
  self._connect()
76
+ if self.tls:
77
+ self.ftp.prot_p()
72
78
 
73
79
  def _connect(self):
80
+ if self.tls:
81
+ ftp_cls = FTP_TLS
82
+ else:
83
+ ftp_cls = FTP
74
84
  if sys.version_info >= (3, 9):
75
- self.ftp = FTP(timeout=self.timeout, encoding=self.encoding)
85
+ self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding)
76
86
  elif self.encoding:
77
87
  warnings.warn("`encoding` not supported for python<3.9, ignoring")
78
- self.ftp = FTP(timeout=self.timeout)
88
+ self.ftp = ftp_cls(timeout=self.timeout)
79
89
  else:
80
- self.ftp = FTP(timeout=self.timeout)
90
+ self.ftp = ftp_cls(timeout=self.timeout)
81
91
  self.ftp.connect(self.host, self.port)
82
92
  self.ftp.login(*self.cred)
83
93
 
@@ -107,9 +117,9 @@ class FTPFileSystem(AbstractFileSystem):
107
117
  except error_perm:
108
118
  out = _mlsd2(self.ftp, path) # Not platform independent
109
119
  for fn, details in out:
110
- if path == "/":
111
- path = "" # just for forming the names, below
112
- details["name"] = "/".join([path, fn.lstrip("/")])
120
+ details["name"] = "/".join(
121
+ ["" if path == "/" else path, fn.lstrip("/")]
122
+ )
113
123
  if details["type"] == "file":
114
124
  details["size"] = int(details["size"])
115
125
  else:
@@ -122,8 +132,8 @@ class FTPFileSystem(AbstractFileSystem):
122
132
  info = self.info(path)
123
133
  if info["type"] == "file":
124
134
  out = [(path, info)]
125
- except (Error, IndexError):
126
- raise FileNotFoundError(path)
135
+ except (Error, IndexError) as exc:
136
+ raise FileNotFoundError(path) from exc
127
137
  files = self.dircache.get(path, out)
128
138
  if not detail:
129
139
  return sorted([fn for fn, details in files])
@@ -137,9 +147,9 @@ class FTPFileSystem(AbstractFileSystem):
137
147
  return {"name": "/", "size": 0, "type": "directory"}
138
148
  files = self.ls(self._parent(path).lstrip("/"), True)
139
149
  try:
140
- out = [f for f in files if f["name"] == path][0]
141
- except IndexError:
142
- raise FileNotFoundError(path)
150
+ out = next(f for f in files if f["name"] == path)
151
+ except StopIteration as exc:
152
+ raise FileNotFoundError(path) from exc
143
153
  return out
144
154
 
145
155
  def get_file(self, rpath, lpath, **kwargs):
@@ -254,7 +254,7 @@ class HTTPFileSystem(AsyncFileSystem):
254
254
  if isfilelike(lpath):
255
255
  outfile = lpath
256
256
  else:
257
- outfile = open(lpath, "wb") # noqa: ASYNC101
257
+ outfile = open(lpath, "wb") # noqa: ASYNC101, ASYNC230
258
258
 
259
259
  try:
260
260
  chunk = True
@@ -282,7 +282,7 @@ class HTTPFileSystem(AsyncFileSystem):
282
282
  context = nullcontext(lpath)
283
283
  use_seek = False # might not support seeking
284
284
  else:
285
- context = open(lpath, "rb") # noqa: ASYNC101
285
+ context = open(lpath, "rb") # noqa: ASYNC101, ASYNC230
286
286
  use_seek = True
287
287
 
288
288
  with context as f:
@@ -805,7 +805,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
805
805
  async with r:
806
806
  out = await r.read()
807
807
  if file:
808
- with open(file, "r+b") as f: # noqa: ASYNC101
808
+ with open(file, "r+b") as f: # noqa: ASYNC101, ASYNC230
809
809
  f.seek(start)
810
810
  f.write(out)
811
811
  else:
@@ -79,6 +79,14 @@ class LocalFileSystem(AbstractFileSystem):
79
79
  t = "file"
80
80
  else:
81
81
  t = "other"
82
+
83
+ size = out.st_size
84
+ if link:
85
+ try:
86
+ out2 = path.stat(follow_symlinks=True)
87
+ size = out2.st_size
88
+ except OSError:
89
+ size = 0
82
90
  path = self._strip_protocol(path.path)
83
91
  else:
84
92
  # str or path-like
@@ -87,6 +95,7 @@ class LocalFileSystem(AbstractFileSystem):
87
95
  link = stat.S_ISLNK(out.st_mode)
88
96
  if link:
89
97
  out = os.stat(path, follow_symlinks=True)
98
+ size = out.st_size
90
99
  if stat.S_ISDIR(out.st_mode):
91
100
  t = "directory"
92
101
  elif stat.S_ISREG(out.st_mode):
@@ -95,20 +104,15 @@ class LocalFileSystem(AbstractFileSystem):
95
104
  t = "other"
96
105
  result = {
97
106
  "name": path,
98
- "size": out.st_size,
107
+ "size": size,
99
108
  "type": t,
100
109
  "created": out.st_ctime,
101
110
  "islink": link,
102
111
  }
103
112
  for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
104
113
  result[field] = getattr(out, f"st_{field}")
105
- if result["islink"]:
114
+ if link:
106
115
  result["destination"] = os.readlink(path)
107
- try:
108
- out2 = os.stat(path, follow_symlinks=True)
109
- result["size"] = out2.st_size
110
- except OSError:
111
- result["size"] = 0
112
116
  return result
113
117
 
114
118
  def lexists(self, path, **kwargs):
@@ -224,8 +224,8 @@ class MemoryFileSystem(AbstractFileSystem):
224
224
  path = self._strip_protocol(path)
225
225
  try:
226
226
  return bytes(self.store[path].getbuffer()[start:end])
227
- except KeyError:
228
- raise FileNotFoundError(path)
227
+ except KeyError as e:
228
+ raise FileNotFoundError(path) from e
229
229
 
230
230
  def _rm(self, path):
231
231
  path = self._strip_protocol(path)
@@ -238,15 +238,15 @@ class MemoryFileSystem(AbstractFileSystem):
238
238
  path = self._strip_protocol(path)
239
239
  try:
240
240
  return self.store[path].modified
241
- except KeyError:
242
- raise FileNotFoundError(path)
241
+ except KeyError as e:
242
+ raise FileNotFoundError(path) from e
243
243
 
244
244
  def created(self, path):
245
245
  path = self._strip_protocol(path)
246
246
  try:
247
247
  return self.store[path].created
248
- except KeyError:
249
- raise FileNotFoundError(path)
248
+ except KeyError as e:
249
+ raise FileNotFoundError(path) from e
250
250
 
251
251
  def rm(self, path, recursive=False, maxdepth=None):
252
252
  if isinstance(path, str):
@@ -5,6 +5,7 @@ import itertools
5
5
  import logging
6
6
  import math
7
7
  import os
8
+ from itertools import chain
8
9
  from functools import lru_cache
9
10
  from typing import TYPE_CHECKING
10
11
 
@@ -16,10 +17,10 @@ except ImportError:
16
17
  if not TYPE_CHECKING:
17
18
  import json
18
19
 
19
- from ..asyn import AsyncFileSystem
20
- from ..callbacks import DEFAULT_CALLBACK
21
- from ..core import filesystem, open, split_protocol
22
- from ..utils import isfilelike, merge_offset_ranges, other_paths
20
+ from fsspec.asyn import AsyncFileSystem
21
+ from fsspec.callbacks import DEFAULT_CALLBACK
22
+ from fsspec.core import filesystem, open, split_protocol
23
+ from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
23
24
 
24
25
  logger = logging.getLogger("fsspec.reference")
25
26
 
@@ -35,7 +36,7 @@ class ReferenceNotReachable(RuntimeError):
35
36
 
36
37
 
37
38
  def _first(d):
38
- return list(d.values())[0]
39
+ return next(iter(d.values()))
39
40
 
40
41
 
41
42
  def _prot_in_references(path, references):
@@ -131,7 +132,6 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
131
132
  self.out_root = out_root or self.root
132
133
  self.cat_thresh = categorical_threshold
133
134
  self.cache_size = cache_size
134
- self.dirs = None
135
135
  self.url = self.root + "/{field}/refs.{record}.parq"
136
136
  # TODO: derive fs from `root`
137
137
  self.fs = fsspec.filesystem("file") if fs is None else fs
@@ -159,7 +159,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
159
159
  path = self.url.format(field=field, record=record)
160
160
  data = io.BytesIO(self.fs.cat_file(path))
161
161
  df = self.pd.read_parquet(data, engine="fastparquet")
162
- refs = {c: df[c].values for c in df.columns}
162
+ refs = {c: df[c].to_numpy() for c in df.columns}
163
163
  return refs
164
164
 
165
165
  self.open_refs = open_refs
@@ -195,32 +195,36 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
195
195
  fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
196
196
  return LazyReferenceMapper(root, fs, **kwargs)
197
197
 
198
- def listdir(self, basename=True):
198
+ @lru_cache()
199
+ def listdir(self):
199
200
  """List top-level directories"""
200
- # cache me?
201
- if self.dirs is None:
202
- dirs = [p.split("/", 1)[0] for p in self.zmetadata]
203
- self.dirs = {p for p in dirs if p and not p.startswith(".")}
204
- listing = self.dirs
205
- if basename:
206
- listing = [os.path.basename(path) for path in listing]
207
- return listing
201
+ dirs = (p.rsplit("/", 1)[0] for p in self.zmetadata if not p.startswith(".z"))
202
+ return set(dirs)
208
203
 
209
204
  def ls(self, path="", detail=True):
210
205
  """Shortcut file listings"""
211
- if not path:
212
- dirnames = self.listdir()
213
- others = set(
214
- [".zmetadata"]
215
- + [name for name in self.zmetadata if "/" not in name]
216
- + [name for name in self._items if "/" not in name]
217
- )
206
+ path = path.rstrip("/")
207
+ pathdash = path + "/" if path else ""
208
+ dirnames = self.listdir()
209
+ dirs = [
210
+ d
211
+ for d in dirnames
212
+ if d.startswith(pathdash) and "/" not in d.lstrip(pathdash)
213
+ ]
214
+ if dirs:
215
+ others = {
216
+ f
217
+ for f in chain(
218
+ [".zmetadata"],
219
+ (name for name in self.zmetadata),
220
+ (name for name in self._items),
221
+ )
222
+ if f.startswith(pathdash) and "/" not in f.lstrip(pathdash)
223
+ }
218
224
  if detail is False:
219
- others.update(dirnames)
225
+ others.update(dirs)
220
226
  return sorted(others)
221
- dirinfo = [
222
- {"name": name, "type": "directory", "size": 0} for name in dirnames
223
- ]
227
+ dirinfo = [{"name": name, "type": "directory", "size": 0} for name in dirs]
224
228
  fileinfo = [
225
229
  {
226
230
  "name": name,
@@ -234,10 +238,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
234
238
  for name in others
235
239
  ]
236
240
  return sorted(dirinfo + fileinfo, key=lambda s: s["name"])
237
- parts = path.split("/", 1)
238
- if len(parts) > 1:
239
- raise FileNotFoundError("Cannot list within directories right now")
240
- field = parts[0]
241
+ field = path
241
242
  others = set(
242
243
  [name for name in self.zmetadata if name.startswith(f"{path}/")]
243
244
  + [name for name in self._items if name.startswith(f"{path}/")]
@@ -291,8 +292,8 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
291
292
  # Chunk keys can be loaded from row group and cached in LRU cache
292
293
  try:
293
294
  refs = self.open_refs(field, record)
294
- except (ValueError, TypeError, FileNotFoundError):
295
- raise KeyError(key)
295
+ except (ValueError, TypeError, FileNotFoundError) as exc:
296
+ raise KeyError(key) from exc
296
297
  columns = ["path", "offset", "size", "raw"]
297
298
  selection = [refs[c][ri] if c in refs else None for c in columns]
298
299
  raw = selection[-1]
@@ -501,6 +502,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
501
502
  if k != ".zmetadata" and ".z" in k:
502
503
  self.zmetadata[k] = json.loads(self._items.pop(k))
503
504
  met = {"metadata": self.zmetadata, "record_size": self.record_size}
505
+ self._items.clear()
504
506
  self._items[".zmetadata"] = json.dumps(met).encode()
505
507
  self.fs.pipe(
506
508
  "/".join([base_url or self.out_root, ".zmetadata"]),
@@ -732,8 +734,8 @@ class ReferenceFileSystem(AsyncFileSystem):
732
734
  logger.debug(f"cat: {path}")
733
735
  try:
734
736
  part = self.references[path]
735
- except KeyError:
736
- raise FileNotFoundError(path)
737
+ except KeyError as exc:
738
+ raise FileNotFoundError(path) from exc
737
739
  if isinstance(part, str):
738
740
  part = part.encode()
739
741
  if isinstance(part, bytes):
@@ -995,9 +997,11 @@ class ReferenceFileSystem(AsyncFileSystem):
995
997
  out = {}
996
998
  for gen in gens:
997
999
  dimension = {
998
- k: v
999
- if isinstance(v, list)
1000
- else range(v.get("start", 0), v["stop"], v.get("step", 1))
1000
+ k: (
1001
+ v
1002
+ if isinstance(v, list)
1003
+ else range(v.get("start", 0), v["stop"], v.get("step", 1))
1004
+ )
1001
1005
  for k, v in gen["dimensions"].items()
1002
1006
  }
1003
1007
  products = (
@@ -1084,7 +1088,7 @@ class ReferenceFileSystem(AsyncFileSystem):
1084
1088
  if self.dircache:
1085
1089
  return path in self.dircache
1086
1090
  elif isinstance(self.references, LazyReferenceMapper):
1087
- return path in self.references.listdir("")
1091
+ return path in self.references.listdir()
1088
1092
  else:
1089
1093
  # this may be faster than building dircache for single calls, but
1090
1094
  # by looping will be slow for many calls; could cache it?
@@ -4,10 +4,12 @@ Windows Samba network shares by using package smbprotocol
4
4
  """
5
5
 
6
6
  import datetime
7
+ import re
7
8
  import uuid
8
9
  from stat import S_ISDIR, S_ISLNK
9
10
 
10
11
  import smbclient
12
+ import smbprotocol.exceptions
11
13
 
12
14
  from .. import AbstractFileSystem
13
15
  from ..utils import infer_storage_options
@@ -67,7 +69,9 @@ class SMBFileSystem(AbstractFileSystem):
67
69
  timeout=60,
68
70
  encrypt=None,
69
71
  share_access=None,
70
- register_session_retries=5,
72
+ register_session_retries=4,
73
+ register_session_retry_wait=1,
74
+ register_session_retry_factor=10,
71
75
  auto_mkdir=False,
72
76
  **kwargs,
73
77
  ):
@@ -103,6 +107,19 @@ class SMBFileSystem(AbstractFileSystem):
103
107
  - 'r': Allow other handles to be opened with read access.
104
108
  - 'w': Allow other handles to be opened with write access.
105
109
  - 'd': Allow other handles to be opened with delete access.
110
+ register_session_retries: int
111
+ Number of retries to register a session with the server. Retries are not performed
112
+ for authentication errors, as they are considered as invalid credentials and not network
113
+ issues. If set to negative value, no register attempts will be performed.
114
+ register_session_retry_wait: int
115
+ Time in seconds to wait between each retry. Number must be non-negative.
116
+ register_session_retry_factor: int
117
+ Base factor for the wait time between each retry. The wait time
118
+ is calculated using exponential function. For factor=1 all wait times
119
+ will be equal to `register_session_retry_wait`. For any number of retries,
120
+ the last wait time will be equal to `register_session_retry_wait` and for retries>1
121
+ the first wait time will be equal to `register_session_retry_wait / factor`.
122
+ Number must be equal to or greater than 1. Optimal factor is 10.
106
123
  auto_mkdir: bool
107
124
  Whether, when opening a file, the directory containing it should
108
125
  be created (if it doesn't already exist). This is assumed by pyarrow
@@ -118,6 +135,17 @@ class SMBFileSystem(AbstractFileSystem):
118
135
  self.temppath = kwargs.pop("temppath", "")
119
136
  self.share_access = share_access
120
137
  self.register_session_retries = register_session_retries
138
+ if register_session_retry_wait < 0:
139
+ raise ValueError(
140
+ "register_session_retry_wait must be a non-negative integer"
141
+ )
142
+ self.register_session_retry_wait = register_session_retry_wait
143
+ if register_session_retry_factor < 1:
144
+ raise ValueError(
145
+ "register_session_retry_factor must be a positive "
146
+ "integer equal to or greater than 1"
147
+ )
148
+ self.register_session_retry_factor = register_session_retry_factor
121
149
  self.auto_mkdir = auto_mkdir
122
150
  self._connect()
123
151
 
@@ -128,7 +156,26 @@ class SMBFileSystem(AbstractFileSystem):
128
156
  def _connect(self):
129
157
  import time
130
158
 
131
- for _ in range(self.register_session_retries):
159
+ if self.register_session_retries <= -1:
160
+ return
161
+
162
+ retried_errors = []
163
+
164
+ wait_time = self.register_session_retry_wait
165
+ n_waits = (
166
+ self.register_session_retries - 1
167
+ ) # -1 = No wait time after the last retry
168
+ factor = self.register_session_retry_factor
169
+
170
+ # Generate wait times for each retry attempt.
171
+ # Wait times are calculated using exponential function. For factor=1 all wait times
172
+ # will be equal to `wait`. For any number of retries the last wait time will be
173
+ # equal to `wait` and for retries>2 the first wait time will be equal to `wait / factor`.
174
+ wait_times = iter(
175
+ factor ** (n / n_waits - 1) * wait_time for n in range(0, n_waits + 1)
176
+ )
177
+
178
+ for attempt in range(self.register_session_retries + 1):
132
179
  try:
133
180
  smbclient.register_session(
134
181
  self.host,
@@ -138,9 +185,35 @@ class SMBFileSystem(AbstractFileSystem):
138
185
  encrypt=self.encrypt,
139
186
  connection_timeout=self.timeout,
140
187
  )
141
- break
142
- except Exception:
143
- time.sleep(0.1)
188
+ return
189
+ except (
190
+ smbprotocol.exceptions.SMBAuthenticationError,
191
+ smbprotocol.exceptions.LogonFailure,
192
+ ):
193
+ # These exceptions should not be repeated, as they clearly indicate
194
+ # that the credentials are invalid and not a network issue.
195
+ raise
196
+ except ValueError as exc:
197
+ if re.findall(r"\[Errno -\d+]", str(exc)):
198
+ # This exception is raised by the smbprotocol.transport:Tcp.connect
199
+ # and originates from socket.gaierror (OSError). These exceptions might
200
+ # be raised due to network instability. We will retry to connect.
201
+ retried_errors.append(exc)
202
+ else:
203
+ # All another ValueError exceptions should be raised, as they are not
204
+ # related to network issues.
205
+ raise
206
+ except Exception as exc:
207
+ # Save the exception and retry to connect. This except might be dropped
208
+ # in the future, once all exceptions suited for retry are identified.
209
+ retried_errors.append(exc)
210
+
211
+ if attempt < self.register_session_retries:
212
+ time.sleep(next(wait_times))
213
+
214
+ # Raise last exception to inform user about the connection issues.
215
+ # Note: Should we use ExceptionGroup to raise all exceptions?
216
+ raise retried_errors[-1]
144
217
 
145
218
  @classmethod
146
219
  def _strip_protocol(cls, path):
@@ -102,7 +102,7 @@ class WebHDFS(AbstractFileSystem):
102
102
  if self._cached:
103
103
  return
104
104
  super().__init__(**kwargs)
105
- self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa
105
+ self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"
106
106
  self.kerb = kerberos
107
107
  self.kerb_kwargs = kerb_kwargs or {}
108
108
  self.pars = {}
@@ -393,7 +393,7 @@ class WebHDFS(AbstractFileSystem):
393
393
  with self.open(tmp_fname, "wb") as rstream:
394
394
  shutil.copyfileobj(lstream, rstream)
395
395
  self.mv(tmp_fname, rpath)
396
- except BaseException: # noqa
396
+ except BaseException:
397
397
  with suppress(FileNotFoundError):
398
398
  self.rm(tmp_fname)
399
399
  raise
@@ -132,3 +132,45 @@ class ZipFileSystem(AbstractArchiveFileSystem):
132
132
  out.size = info["size"]
133
133
  out.name = info["name"]
134
134
  return out
135
+
136
+ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs):
137
+ if maxdepth is not None and maxdepth < 1:
138
+ raise ValueError("maxdepth must be at least 1")
139
+
140
+ # Remove the leading slash, as the zip file paths are always
141
+ # given without a leading slash
142
+ path = path.lstrip("/")
143
+ path_parts = list(filter(lambda s: bool(s), path.split("/")))
144
+
145
+ def _matching_starts(file_path):
146
+ file_parts = filter(lambda s: bool(s), file_path.split("/"))
147
+ return all(a == b for a, b in zip(path_parts, file_parts))
148
+
149
+ self._get_dirs()
150
+
151
+ result = {}
152
+ # To match posix find, if an exact file name is given, we should
153
+ # return only that file
154
+ if path in self.dir_cache and self.dir_cache[path]["type"] == "file":
155
+ result[path] = self.dir_cache[path]
156
+ return result if detail else [path]
157
+
158
+ for file_path, file_info in self.dir_cache.items():
159
+ if not (path == "" or _matching_starts(file_path)):
160
+ continue
161
+
162
+ if file_info["type"] == "directory":
163
+ if withdirs:
164
+ if file_path not in result:
165
+ result[file_path.strip("/")] = file_info
166
+ continue
167
+
168
+ if file_path not in result:
169
+ result[file_path] = file_info if detail else None
170
+
171
+ if maxdepth:
172
+ path_depth = path.count("/")
173
+ result = {
174
+ k: v for k, v in result.items() if k.count("/") - path_depth < maxdepth
175
+ }
176
+ return result if detail else sorted(result)
fsspec/mapping.py CHANGED
@@ -153,10 +153,10 @@ class FSMap(MutableMapping):
153
153
  k = self._key_to_str(key)
154
154
  try:
155
155
  result = self.fs.cat(k)
156
- except self.missing_exceptions:
156
+ except self.missing_exceptions as exc:
157
157
  if default is not None:
158
158
  return default
159
- raise KeyError(key)
159
+ raise KeyError(key) from exc
160
160
  return result
161
161
 
162
162
  def pop(self, key, default=None):
@@ -184,8 +184,8 @@ class FSMap(MutableMapping):
184
184
  """Remove key"""
185
185
  try:
186
186
  self.fs.rm(self._key_to_str(key))
187
- except: # noqa: E722
188
- raise KeyError
187
+ except Exception as exc:
188
+ raise KeyError from exc
189
189
 
190
190
  def __contains__(self, key):
191
191
  """Does key exist in mapping?"""
fsspec/spec.py CHANGED
@@ -1892,7 +1892,7 @@ class AbstractBufferedFile(io.IOBase):
1892
1892
  self.offset = 0
1893
1893
  try:
1894
1894
  self._initiate_upload()
1895
- except: # noqa: E722
1895
+ except:
1896
1896
  self.closed = True
1897
1897
  raise
1898
1898
 
@@ -4,9 +4,9 @@ from hashlib import md5
4
4
  import pytest
5
5
 
6
6
  from fsspec.implementations.local import LocalFileSystem
7
- from fsspec.tests.abstract.copy import AbstractCopyTests # noqa
8
- from fsspec.tests.abstract.get import AbstractGetTests # noqa
9
- from fsspec.tests.abstract.put import AbstractPutTests # noqa
7
+ from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401
8
+ from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401
9
+ from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401
10
10
 
11
11
 
12
12
  class BaseAbstractFixtures:
fsspec/utils.py CHANGED
@@ -427,10 +427,7 @@ def is_exception(obj: Any) -> bool:
427
427
 
428
428
 
429
429
  def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
430
- for attr in ["read", "close", "tell"]:
431
- if not hasattr(f, attr):
432
- return False
433
- return True
430
+ return all(hasattr(f, attr) for attr in ["read", "close", "tell"])
434
431
 
435
432
 
436
433
  def get_protocol(url: str) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: fsspec
3
- Version: 2024.6.1
3
+ Version: 2024.9.0
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -1,55 +1,55 @@
1
1
  fsspec/__init__.py,sha256=l9MJaNNV2d4wKpCtMvXDr55n92DkdrAayGy3F9ICjzk,1998
2
- fsspec/_version.py,sha256=bs5gFL9Mlwh1IvRh5hZawz_SleC88gaDg6jodp6omsQ,417
2
+ fsspec/_version.py,sha256=1O0P6wbwqtkyltAT0n4UISDOJbtOp9uOwvifb7ASk-U,417
3
3
  fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
4
- fsspec/asyn.py,sha256=AOd2SXH2YPCaQL5jA6IegYevdMFkAnGD7Seh9DC2gSE,36404
4
+ fsspec/asyn.py,sha256=MTe85f2Rmvwg-uhZbckpU_GemYYYSZ3AAj8Et9CCgmk,36390
5
5
  fsspec/caching.py,sha256=x6IEdxtR3cMDjy40sNHyawR2SLtNSahVuP5i_TImdso,31600
6
6
  fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
7
7
  fsspec/compression.py,sha256=jCSUMJu-zSNyrusnHT0wKXgOd1tTJR6vM126i5SR5Zc,4865
8
8
  fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
9
9
  fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
10
- fsspec/core.py,sha256=Iln37fNZqjjk5vaDGU_0WWuwOxN1iVsQ6sDmCmuEvrs,23681
10
+ fsspec/core.py,sha256=299qCp0H3w3e6zbiK5YOm3pJjxuPr4IYcK6Yg1Zgcos,23684
11
11
  fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
12
12
  fsspec/exceptions.py,sha256=pauSLDMxzTJMOjvX1WEUK0cMyFkrFxpWJsyFywav7A8,331
13
- fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
13
+ fsspec/fuse.py,sha256=Q-3NOOyLqBfYa4Db5E19z_ZY36zzYHtIs1mOUasItBQ,10177
14
14
  fsspec/generic.py,sha256=AFbo-mHBt5QJV1Aplg5CJuUiiJ4bNQhcKRuwkZJdWac,13761
15
- fsspec/gui.py,sha256=k46F11VGBLlrliPj3XbxHKlVGByWoX67Ofmu9ijaPBQ,13929
15
+ fsspec/gui.py,sha256=xBnHL2-r0LVwhDAtnHoPpXts7jd4Z32peawCJiI-7lI,13975
16
16
  fsspec/json.py,sha256=65sQ0Y7mTj33u_Y4IId5up4abQ3bAel4E4QzbKMiQSg,3826
17
- fsspec/mapping.py,sha256=hSsiRo-dgAOj6oHf67bF3i11U4xREglXToHGUX4GhRY,8261
17
+ fsspec/mapping.py,sha256=CtD_GEmyYgXefQHndkxu7Zb_kbTS3mlFP2zIwlAoQTY,8289
18
18
  fsspec/parquet.py,sha256=ONG29Enesp0ToCH2bQ7zkpimnVIsZ2S4xCLj35-fY78,19455
19
19
  fsspec/registry.py,sha256=HVC-4HWDZnA6rycJwAu8F8ZXzON_85MTQVIyS6LOHxo,11320
20
- fsspec/spec.py,sha256=6rb-C3hTZLLtMGx2HDp37N_sZKs5RtYdcj8XOlHFi_c,69586
20
+ fsspec/spec.py,sha256=A48RUDL50AwSOGB1VT114GC1TY93SGlr3fkBO1Yp0Fk,69572
21
21
  fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
22
- fsspec/utils.py,sha256=8czEIoX4GpcC42WLGoy3t_EMeZjJE8e5rTpOT_nEPo0,22987
22
+ fsspec/utils.py,sha256=dVaokocjhMOnO3B1KmKlgxYqojQJyzb3mgIfaAaz8Pk,22941
23
23
  fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- fsspec/implementations/arrow.py,sha256=Y4F_IwWXuJI1mRO_c0_PI5o-Wp58RLmoiH_s-x88w4M,8631
24
+ fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
25
25
  fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
26
26
  fsspec/implementations/cache_metadata.py,sha256=pcOJYcBQY5OaC7Yhw0F3wjg08QLYApGmoISCrbs59ks,8511
27
27
  fsspec/implementations/cached.py,sha256=t5atYATgjuABm-mUyReqjGqVyyP1XBSuROX92aMecxY,32826
28
28
  fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
29
29
  fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
30
- fsspec/implementations/dbfs.py,sha256=cix9OYUveuSOx5UO5uRUwNUkYqjzyY0fkKnca1kTgZ0,15014
31
- fsspec/implementations/dirfs.py,sha256=0H6k67e2lZgq3U4K64ao6894L4134COUCekc3PCwTq8,11488
32
- fsspec/implementations/ftp.py,sha256=rp6cTog8xqjDPlKdSLKcsyP7K593_ByMabxGbNSEpTo,11655
30
+ fsspec/implementations/dbfs.py,sha256=a0eNjLxyfFK7pbEa52U8K-PhNHukzdGVx1eLcVniaXY,15092
31
+ fsspec/implementations/dirfs.py,sha256=VPSJhy2wFZodY5BB-o1tkWGYecvU2EmbgubmX3lOwuw,11815
32
+ fsspec/implementations/ftp.py,sha256=VpJWnQscdEKRu4fzkCtuf3jD9A74mBaerS2ijUwZ-_I,11936
33
33
  fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
34
34
  fsspec/implementations/github.py,sha256=eAn1kJ7VeWR6gVoVRLBYclF_rQDXSJU-xzMXpvPQWqs,8002
35
- fsspec/implementations/http.py,sha256=ymjMQTXW6-akqqEoEKpjf416JAzP9N4VhnWiNYbRklk,29665
35
+ fsspec/implementations/http.py,sha256=BjDJ72IoUCe_EHKc44J__sy8VHCOdc98gVEq27sWUk8,29695
36
36
  fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
37
37
  fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
38
- fsspec/implementations/local.py,sha256=qc68w69-I7zqVO8njv_s-THVImwICOqxyt-_2EK1VLg,15042
39
- fsspec/implementations/memory.py,sha256=-BpOVwaWyW2rDvxWIIcrZTNFAhvuG66VWeIM6vLwhkc,10134
40
- fsspec/implementations/reference.py,sha256=iDisTIZ8kIWG_FNSGaDf88RClywAwoF8yMgoVcxM4cY,44308
38
+ fsspec/implementations/local.py,sha256=DNBZhF9LYYTPR4PKedeWuk32Tztc9jlgXtGRFGX7nv4,15103
39
+ fsspec/implementations/memory.py,sha256=-0AedWR-jBaw2zamEuL4ku73lJQwRdp-Muia0u1j6pU,10170
40
+ fsspec/implementations/reference.py,sha256=43lG6cq9GP0JfMv_n_CyPznRcpEaWAaxG-rgeZt9BV4,44375
41
41
  fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
42
- fsspec/implementations/smb.py,sha256=RcqCvVBPD3U0I0Rc31ns6HRhqKVDugjPQMDPVpvZSNg,11408
42
+ fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
43
43
  fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
44
- fsspec/implementations/webhdfs.py,sha256=Wm7zr0iX3SZx5LtWfJIo-5rkIaoEoWq_Ev87NWbUgug,16721
45
- fsspec/implementations/zip.py,sha256=vc1fNz-yO8uWQ9bQUqBFYpTcgsfZQq9vDwwg4Aufs9Y,4417
46
- fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
44
+ fsspec/implementations/webhdfs.py,sha256=aet-AOfMoK91C3jNu5xBxK0Mu2iaAWiL9Xfu12KyjQI,16705
45
+ fsspec/implementations/zip.py,sha256=XoRukvrnJWngLbE8Exp2XCVf3SgSPmOqdeCqQ3NpSr0,6047
46
+ fsspec/tests/abstract/__init__.py,sha256=o3rQBCeTTTdji0OxKdTvBvwL7q78sEIh5J5-Q-If6z0,10046
47
47
  fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
48
48
  fsspec/tests/abstract/copy.py,sha256=gU5-d97U3RSde35Vp4RxPY4rWwL744HiSrJ8IBOp9-8,19967
49
49
  fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
50
50
  fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1982
51
51
  fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
52
- fsspec-2024.6.1.dist-info/METADATA,sha256=ijt16ZAzPN9P0_1AU4zcKdiM18pkIyf1Gkr-IFXrlLw,11749
53
- fsspec-2024.6.1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
54
- fsspec-2024.6.1.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
55
- fsspec-2024.6.1.dist-info/RECORD,,
52
+ fsspec-2024.9.0.dist-info/METADATA,sha256=Jgjl4t19VdvyQ81LzLYeXagQpe2Om5QJDQ4nDfKdGbc,11749
53
+ fsspec-2024.9.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
54
+ fsspec-2024.9.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
55
+ fsspec-2024.9.0.dist-info/RECORD,,