fsspec 2023.6.0__py3-none-any.whl → 2023.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -150,7 +150,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
150
150
  """List top-level directories"""
151
151
  if self.dirs is None:
152
152
  dirs = [p.split("/", 1)[0] for p in self.zmetadata]
153
- self.dirs = set(sorted(p for p in dirs if p and not p.startswith(".")))
153
+ self.dirs = {p for p in dirs if p and not p.startswith(".")}
154
154
  listing = self.dirs
155
155
  if basename:
156
156
  listing = [os.path.basename(path) for path in listing]
@@ -381,17 +381,17 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
381
381
  raws[j] = kerchunk.df._proc_raw(data)
382
382
  # TODO: only save needed columns
383
383
  df = pd.DataFrame(
384
- dict(
385
- path=paths,
386
- offset=offsets,
387
- size=sizes,
388
- raw=raws,
389
- ),
384
+ {
385
+ "path": paths,
386
+ "offset": offsets,
387
+ "size": sizes,
388
+ "raw": raws,
389
+ },
390
390
  copy=False,
391
391
  )
392
392
  if df.path.count() / (df.path.nunique() or 1) > self.cat_thresh:
393
393
  df["path"] = df["path"].astype("category")
394
- object_encoding = dict(raw="bytes", path="utf8")
394
+ object_encoding = {"raw": "bytes", "path": "utf8"}
395
395
  has_nulls = ["path", "raw"]
396
396
 
397
397
  self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
@@ -110,8 +110,12 @@ class SFTPFileSystem(AbstractFileSystem):
110
110
  "type": t,
111
111
  "uid": stat.st_uid,
112
112
  "gid": stat.st_gid,
113
- "time": datetime.datetime.utcfromtimestamp(stat.st_atime),
114
- "mtime": datetime.datetime.utcfromtimestamp(stat.st_mtime),
113
+ "time": datetime.datetime.fromtimestamp(
114
+ stat.st_atime, tz=datetime.timezone.utc
115
+ ),
116
+ "mtime": datetime.datetime.fromtimestamp(
117
+ stat.st_mtime, tz=datetime.timezone.utc
118
+ ),
115
119
  }
116
120
  if parent_path:
117
121
  out["name"] = "/".join([parent_path.rstrip("/"), stat.filename])
@@ -81,7 +81,7 @@ class SMBFileSystem(AbstractFileSystem):
81
81
  ----------
82
82
  host: str
83
83
  The remote server name/ip to connect to
84
- port: int
84
+ port: int or None
85
85
  Port to connect with. Usually 445, sometimes 139.
86
86
  username: str or None
87
87
  Username to connect with. Required if Kerberos auth is not being used.
@@ -114,12 +114,16 @@ class SMBFileSystem(AbstractFileSystem):
114
114
  self.share_access = share_access
115
115
  self._connect()
116
116
 
117
+ @property
118
+ def _port(self):
119
+ return 445 if self.port is None else self.port
120
+
117
121
  def _connect(self):
118
122
  smbclient.register_session(
119
123
  self.host,
120
124
  username=self.username,
121
125
  password=self.password,
122
- port=445 if self.port is None else self.port,
126
+ port=self._port,
123
127
  encrypt=self.encrypt,
124
128
  connection_timeout=self.timeout,
125
129
  )
@@ -139,23 +143,23 @@ class SMBFileSystem(AbstractFileSystem):
139
143
  def mkdir(self, path, create_parents=True, **kwargs):
140
144
  wpath = _as_unc_path(self.host, path)
141
145
  if create_parents:
142
- smbclient.makedirs(wpath, exist_ok=False, **kwargs)
146
+ smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs)
143
147
  else:
144
- smbclient.mkdir(wpath, **kwargs)
148
+ smbclient.mkdir(wpath, port=self._port, **kwargs)
145
149
 
146
150
  def makedirs(self, path, exist_ok=False):
147
151
  if _share_has_path(path):
148
152
  wpath = _as_unc_path(self.host, path)
149
- smbclient.makedirs(wpath, exist_ok=exist_ok)
153
+ smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port)
150
154
 
151
155
  def rmdir(self, path):
152
156
  if _share_has_path(path):
153
157
  wpath = _as_unc_path(self.host, path)
154
- smbclient.rmdir(wpath)
158
+ smbclient.rmdir(wpath, port=self._port)
155
159
 
156
160
  def info(self, path, **kwargs):
157
161
  wpath = _as_unc_path(self.host, path)
158
- stats = smbclient.stat(wpath, **kwargs)
162
+ stats = smbclient.stat(wpath, port=self._port, **kwargs)
159
163
  if S_ISDIR(stats.st_mode):
160
164
  stype = "directory"
161
165
  elif S_ISLNK(stats.st_mode):
@@ -176,18 +180,18 @@ class SMBFileSystem(AbstractFileSystem):
176
180
  def created(self, path):
177
181
  """Return the created timestamp of a file as a datetime.datetime"""
178
182
  wpath = _as_unc_path(self.host, path)
179
- stats = smbclient.stat(wpath)
180
- return datetime.datetime.utcfromtimestamp(stats.st_ctime)
183
+ stats = smbclient.stat(wpath, port=self._port)
184
+ return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc)
181
185
 
182
186
  def modified(self, path):
183
187
  """Return the modified timestamp of a file as a datetime.datetime"""
184
188
  wpath = _as_unc_path(self.host, path)
185
- stats = smbclient.stat(wpath)
186
- return datetime.datetime.utcfromtimestamp(stats.st_mtime)
189
+ stats = smbclient.stat(wpath, port=self._port)
190
+ return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc)
187
191
 
188
192
  def ls(self, path, detail=True, **kwargs):
189
193
  unc = _as_unc_path(self.host, path)
190
- listed = smbclient.listdir(unc, **kwargs)
194
+ listed = smbclient.listdir(unc, port=self._port, **kwargs)
191
195
  dirs = ["/".join([path.rstrip("/"), p]) for p in listed]
192
196
  if detail:
193
197
  dirs = [self.info(d) for d in dirs]
@@ -217,30 +221,37 @@ class SMBFileSystem(AbstractFileSystem):
217
221
  share_access = kwargs.pop("share_access", self.share_access)
218
222
  if "w" in mode and autocommit is False:
219
223
  temp = _as_temp_path(self.host, path, self.temppath)
220
- return SMBFileOpener(wpath, temp, mode, block_size=bls, **kwargs)
224
+ return SMBFileOpener(
225
+ wpath, temp, mode, port=self._port, block_size=bls, **kwargs
226
+ )
221
227
  return smbclient.open_file(
222
- wpath, mode, buffering=bls, share_access=share_access, **kwargs
228
+ wpath,
229
+ mode,
230
+ buffering=bls,
231
+ share_access=share_access,
232
+ port=self._port,
233
+ **kwargs,
223
234
  )
224
235
 
225
236
  def copy(self, path1, path2, **kwargs):
226
237
  """Copy within two locations in the same filesystem"""
227
238
  wpath1 = _as_unc_path(self.host, path1)
228
239
  wpath2 = _as_unc_path(self.host, path2)
229
- smbclient.copyfile(wpath1, wpath2, **kwargs)
240
+ smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs)
230
241
 
231
242
  def _rm(self, path):
232
243
  if _share_has_path(path):
233
244
  wpath = _as_unc_path(self.host, path)
234
- stats = smbclient.stat(wpath)
245
+ stats = smbclient.stat(wpath, port=self._port)
235
246
  if S_ISDIR(stats.st_mode):
236
- smbclient.rmdir(wpath)
247
+ smbclient.rmdir(wpath, port=self._port)
237
248
  else:
238
- smbclient.remove(wpath)
249
+ smbclient.remove(wpath, port=self._port)
239
250
 
240
251
  def mv(self, path1, path2, **kwargs):
241
252
  wpath1 = _as_unc_path(self.host, path1)
242
253
  wpath2 = _as_unc_path(self.host, path2)
243
- smbclient.rename(wpath1, wpath2, **kwargs)
254
+ smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
244
255
 
245
256
 
246
257
  def _as_unc_path(host, path):
@@ -266,7 +277,7 @@ def _share_has_path(path):
266
277
  class SMBFileOpener(object):
267
278
  """writes to remote temporary file, move on commit"""
268
279
 
269
- def __init__(self, path, temp, mode, block_size=-1, **kwargs):
280
+ def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs):
270
281
  self.path = path
271
282
  self.temp = temp
272
283
  self.mode = mode
@@ -274,22 +285,27 @@ class SMBFileOpener(object):
274
285
  self.kwargs = kwargs
275
286
  self.smbfile = None
276
287
  self._incontext = False
288
+ self.port = port
277
289
  self._open()
278
290
 
279
291
  def _open(self):
280
292
  if self.smbfile is None or self.smbfile.closed:
281
293
  self.smbfile = smbclient.open_file(
282
- self.temp, self.mode, buffering=self.block_size, **self.kwargs
294
+ self.temp,
295
+ self.mode,
296
+ port=self.port,
297
+ buffering=self.block_size,
298
+ **self.kwargs,
283
299
  )
284
300
 
285
301
  def commit(self):
286
302
  """Move temp file to definitive on success."""
287
303
  # TODO: use transaction support in SMB protocol
288
- smbclient.replace(self.temp, self.path)
304
+ smbclient.replace(self.temp, self.path, port=self.port)
289
305
 
290
306
  def discard(self):
291
307
  """Remove the temp file on failure."""
292
- smbclient.remove(self.temp)
308
+ smbclient.remove(self.temp, port=self.port)
293
309
 
294
310
  def __fspath__(self):
295
311
  return self.path
fsspec/mapping.py CHANGED
@@ -2,6 +2,7 @@ import array
2
2
  import posixpath
3
3
  import warnings
4
4
  from collections.abc import MutableMapping
5
+ from functools import cached_property
5
6
 
6
7
  from .core import url_to_fs
7
8
 
@@ -59,6 +60,13 @@ class FSMap(MutableMapping):
59
60
  self.fs.touch(root + "/a")
60
61
  self.fs.rm(root + "/a")
61
62
 
63
+ @cached_property
64
+ def dirfs(self):
65
+ """dirfs instance that can be used with the same keys as the mapper"""
66
+ from .implementations.dirfs import DirFileSystem
67
+
68
+ return DirFileSystem(path=self._root_key_to_str, fs=self.fs)
69
+
62
70
  def clear(self):
63
71
  """Remove all keys below root - empties out mapping"""
64
72
  try:
fsspec/registry.py CHANGED
@@ -119,6 +119,10 @@ known_implementations = {
119
119
  "class": "ocifs.OCIFileSystem",
120
120
  "err": "Install ocifs to access OCI Object Storage",
121
121
  },
122
+ "ocilake": {
123
+ "class": "ocifs.OCIFileSystem",
124
+ "err": "Install ocifs to access OCI Data Lake",
125
+ },
122
126
  "asynclocal": {
123
127
  "class": "morefs.asyn_local.AsyncLocalFileSystem",
124
128
  "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
@@ -200,6 +204,10 @@ known_implementations = {
200
204
  "class": "boxfs.BoxFileSystem",
201
205
  "err": "Please install boxfs to access BoxFileSystem",
202
206
  },
207
+ "lakefs": {
208
+ "class": "lakefs_spec.LakeFSFileSystem",
209
+ "err": "Please install lakefs-spec to access LakeFSFileSystem",
210
+ },
203
211
  }
204
212
 
205
213
 
@@ -233,6 +241,14 @@ def get_filesystem_class(protocol):
233
241
  return cls
234
242
 
235
243
 
244
+ s3_msg = """Your installed version of s3fs is very old and known to cause
245
+ severe performance issues, see also https://github.com/dask/dask/issues/10276
246
+
247
+ To fix, you should specify a lower version bound on s3fs, or
248
+ update the current installation.
249
+ """
250
+
251
+
236
252
  def _import_class(cls, minv=None):
237
253
  """Take a string FQP and return the imported class or identifier
238
254
 
@@ -240,13 +256,19 @@ def _import_class(cls, minv=None):
240
256
  """
241
257
  if ":" in cls:
242
258
  mod, name = cls.rsplit(":", 1)
259
+ s3 = mod == "s3fs"
243
260
  mod = importlib.import_module(mod)
261
+ if s3 and mod.__version__.split(".") < ["0", "5"]:
262
+ warnings.warn(s3_msg)
244
263
  for part in name.split("."):
245
264
  mod = getattr(mod, part)
246
265
  return mod
247
266
  else:
248
267
  mod, name = cls.rsplit(".", 1)
268
+ s3 = mod == "s3fs"
249
269
  mod = importlib.import_module(mod)
270
+ if s3 and mod.__version__.split(".") < ["0", "5"]:
271
+ warnings.warn(s3_msg)
250
272
  return getattr(mod, name)
251
273
 
252
274