fsspec 2024.9.0__py3-none-any.whl → 2024.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,9 +5,9 @@ import itertools
5
5
  import logging
6
6
  import math
7
7
  import os
8
- from itertools import chain
9
8
  from functools import lru_cache
10
- from typing import TYPE_CHECKING
9
+ from itertools import chain
10
+ from typing import TYPE_CHECKING, Literal
11
11
 
12
12
  import fsspec.core
13
13
 
@@ -20,6 +20,7 @@ except ImportError:
20
20
  from fsspec.asyn import AsyncFileSystem
21
21
  from fsspec.callbacks import DEFAULT_CALLBACK
22
22
  from fsspec.core import filesystem, open, split_protocol
23
+ from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
23
24
  from fsspec.utils import isfilelike, merge_offset_ranges, other_paths
24
25
 
25
26
  logger = logging.getLogger("fsspec.reference")
@@ -41,7 +42,7 @@ def _first(d):
41
42
 
42
43
  def _prot_in_references(path, references):
43
44
  ref = references.get(path)
44
- if isinstance(ref, (list, tuple)):
45
+ if isinstance(ref, (list, tuple)) and isinstance(ref[0], str):
45
46
  return split_protocol(ref[0])[0] if ref[0] else ref[0]
46
47
 
47
48
 
@@ -104,7 +105,13 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
104
105
  return pd
105
106
 
106
107
  def __init__(
107
- self, root, fs=None, out_root=None, cache_size=128, categorical_threshold=10
108
+ self,
109
+ root,
110
+ fs=None,
111
+ out_root=None,
112
+ cache_size=128,
113
+ categorical_threshold=10,
114
+ engine: Literal["fastparquet", "pyarrow"] = "fastparquet",
108
115
  ):
109
116
  """
110
117
 
@@ -126,16 +133,25 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
126
133
  Encode urls as pandas.Categorical to reduce memory footprint if the ratio
127
134
  of the number of unique urls to total number of refs for each variable
128
135
  is greater than or equal to this number. (default 10)
136
+ engine: Literal["fastparquet","pyarrow"]
137
+ Engine choice for reading parquet files. (default is "fastparquet")
129
138
  """
139
+
130
140
  self.root = root
131
141
  self.chunk_sizes = {}
132
142
  self.out_root = out_root or self.root
133
143
  self.cat_thresh = categorical_threshold
144
+ self.engine = engine
134
145
  self.cache_size = cache_size
135
146
  self.url = self.root + "/{field}/refs.{record}.parq"
136
147
  # TODO: derive fs from `root`
137
148
  self.fs = fsspec.filesystem("file") if fs is None else fs
138
149
 
150
+ from importlib.util import find_spec
151
+
152
+ if self.engine == "pyarrow" and find_spec("pyarrow") is None:
153
+ raise ImportError("engine choice `pyarrow` is not installed.")
154
+
139
155
  def __getattr__(self, item):
140
156
  if item in ("_items", "record_size", "zmetadata"):
141
157
  self.setup()
@@ -158,8 +174,11 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
158
174
  """cached parquet file loader"""
159
175
  path = self.url.format(field=field, record=record)
160
176
  data = io.BytesIO(self.fs.cat_file(path))
161
- df = self.pd.read_parquet(data, engine="fastparquet")
162
- refs = {c: df[c].to_numpy() for c in df.columns}
177
+ try:
178
+ df = self.pd.read_parquet(data, engine=self.engine)
179
+ refs = {c: df[c].to_numpy() for c in df.columns}
180
+ except OSError:
181
+ refs = None
163
182
  return refs
164
183
 
165
184
  self.open_refs = open_refs
@@ -413,7 +432,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
413
432
  if len(partition) < self.record_size:
414
433
  try:
415
434
  original = self.open_refs(field, record)
416
- except IOError:
435
+ except OSError:
417
436
  pass
418
437
 
419
438
  if original:
@@ -463,18 +482,28 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
463
482
 
464
483
  fn = f"{base_url or self.out_root}/{field}/refs.{record}.parq"
465
484
  self.fs.mkdirs(f"{base_url or self.out_root}/{field}", exist_ok=True)
485
+
486
+ if self.engine == "pyarrow":
487
+ df_backend_kwargs = {"write_statistics": False}
488
+ elif self.engine == "fastparquet":
489
+ df_backend_kwargs = {
490
+ "stats": False,
491
+ "object_encoding": object_encoding,
492
+ "has_nulls": has_nulls,
493
+ }
494
+ else:
495
+ raise NotImplementedError(f"{self.engine} not supported")
496
+
466
497
  df.to_parquet(
467
498
  fn,
468
- engine="fastparquet",
499
+ engine=self.engine,
469
500
  storage_options=storage_options
470
501
  or getattr(self.fs, "storage_options", None),
471
502
  compression="zstd",
472
503
  index=False,
473
- stats=False,
474
- object_encoding=object_encoding,
475
- has_nulls=has_nulls,
476
- # **kwargs,
504
+ **df_backend_kwargs,
477
505
  )
506
+
478
507
  partition.clear()
479
508
  self._items.pop((field, record))
480
509
 
@@ -486,6 +515,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
486
515
  base_url: str
487
516
  Location of the output
488
517
  """
518
+
489
519
  # write what we have so far and clear sub chunks
490
520
  for thing in list(self._items):
491
521
  if isinstance(thing, tuple):
@@ -728,6 +758,10 @@ class ReferenceFileSystem(AsyncFileSystem):
728
758
  self.fss[remote_protocol] = fs
729
759
 
730
760
  self.fss[None] = fs or filesystem("file") # default one
761
+ # Wrap any non-async filesystems to ensure async methods are available below
762
+ for k, f in self.fss.items():
763
+ if not f.async_impl:
764
+ self.fss[k] = AsyncFileSystemWrapper(f)
731
765
 
732
766
  def _cat_common(self, path, start=None, end=None):
733
767
  path = self._strip_protocol(path)
@@ -777,7 +811,9 @@ class ReferenceFileSystem(AsyncFileSystem):
777
811
  return part_or_url[start:end]
778
812
  protocol, _ = split_protocol(part_or_url)
779
813
  try:
780
- await self.fss[protocol]._cat_file(part_or_url, start=start, end=end)
814
+ return await self.fss[protocol]._cat_file(
815
+ part_or_url, start=start0, end=end0
816
+ )
781
817
  except Exception as e:
782
818
  raise ReferenceNotReachable(path, part_or_url) from e
783
819
 
@@ -845,6 +881,9 @@ class ReferenceFileSystem(AsyncFileSystem):
845
881
  # found and on_error is "raise"
846
882
  try:
847
883
  u, s, e = self._cat_common(p)
884
+ if not isinstance(u, (bytes, str)):
885
+ # nan/None from parquet
886
+ continue
848
887
  except FileNotFoundError as err:
849
888
  if on_error == "raise":
850
889
  raise
@@ -1147,13 +1186,17 @@ class ReferenceFileSystem(AsyncFileSystem):
1147
1186
  ) # ignores FileNotFound, just as well for directories
1148
1187
  self.dircache.clear() # this is a bit heavy handed
1149
1188
 
1150
- async def _pipe_file(self, path, data):
1189
+ async def _pipe_file(self, path, data, mode="overwrite", **kwargs):
1190
+ if mode == "create" and self.exists(path):
1191
+ raise FileExistsError
1151
1192
  # can be str or bytes
1152
1193
  self.references[path] = data
1153
1194
  self.dircache.clear() # this is a bit heavy handed
1154
1195
 
1155
- async def _put_file(self, lpath, rpath, **kwargs):
1196
+ async def _put_file(self, lpath, rpath, mode="overwrite", **kwargs):
1156
1197
  # puts binary
1198
+ if mode == "create" and self.exists(rpath):
1199
+ raise FileExistsError
1157
1200
  with open(lpath, "rb") as f:
1158
1201
  self.references[rpath] = f.read()
1159
1202
  self.dircache.clear() # this is a bit heavy handed
@@ -166,7 +166,8 @@ class WebHDFS(AbstractFileSystem):
166
166
  self.session.auth = HTTPBasicAuth(self.user, self.password)
167
167
 
168
168
  def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
169
- url = self._apply_proxy(self.url + quote(path or "", safe="/="))
169
+ path = self._strip_protocol(path) if path is not None else ""
170
+ url = self._apply_proxy(self.url + quote(path, safe="/="))
170
171
  args = kwargs.copy()
171
172
  args.update(self.pars)
172
173
  args["op"] = op.upper()
@@ -1,3 +1,4 @@
1
+ import os
1
2
  import zipfile
2
3
 
3
4
  import fsspec
@@ -48,7 +49,7 @@ class ZipFileSystem(AbstractArchiveFileSystem):
48
49
  if mode not in set("rwa"):
49
50
  raise ValueError(f"mode '{mode}' no understood")
50
51
  self.mode = mode
51
- if isinstance(fo, str):
52
+ if isinstance(fo, (str, os.PathLike)):
52
53
  if mode == "a":
53
54
  m = "r+b"
54
55
  else:
fsspec/mapping.py CHANGED
@@ -112,7 +112,7 @@ class FSMap(MutableMapping):
112
112
  for k, v in out.items()
113
113
  }
114
114
  return {
115
- key: out[k2]
115
+ key: out[k2] if on_error == "raise" else out.get(k2, KeyError(k2))
116
116
  for key, k2 in zip(keys, keys2)
117
117
  if on_error == "return" or not isinstance(out[k2], BaseException)
118
118
  }
fsspec/parquet.py CHANGED
@@ -336,7 +336,7 @@ def _add_header_magic(data):
336
336
  # Add b"PAR1" to file headers
337
337
  for path in list(data.keys()):
338
338
  add_magic = True
339
- for k in data[path].keys():
339
+ for k in data[path]:
340
340
  if k[0] == 0 and k[1] >= 4:
341
341
  add_magic = False
342
342
  break
fsspec/registry.py CHANGED
@@ -202,6 +202,10 @@ known_implementations = {
202
202
  "err": 'SFTPFileSystem requires "paramiko" to be installed',
203
203
  },
204
204
  "tar": {"class": "fsspec.implementations.tar.TarFileSystem"},
205
+ "tosfs": {
206
+ "class": "tosfs.TosFileSystem",
207
+ "err": "Install tosfs to access ByteDance volcano engine Tinder Object Storage",
208
+ },
205
209
  "wandb": {"class": "wandbfs.WandbFS", "err": "Install wandbfs to access wandb"},
206
210
  "webdav": {
207
211
  "class": "webdav4.fsspec.WebdavFileSystem",
fsspec/spec.py CHANGED
@@ -10,7 +10,7 @@ import weakref
10
10
  from errno import ESPIPE
11
11
  from glob import has_magic
12
12
  from hashlib import sha256
13
- from typing import Any, ClassVar, Dict, Tuple
13
+ from typing import Any, ClassVar
14
14
 
15
15
  from .callbacks import DEFAULT_CALLBACK
16
16
  from .config import apply_config, conf
@@ -117,8 +117,8 @@ class AbstractFileSystem(metaclass=_Cached):
117
117
  _extra_tokenize_attributes = ()
118
118
 
119
119
  # Set by _Cached metaclass
120
- storage_args: Tuple[Any, ...]
121
- storage_options: Dict[str, Any]
120
+ storage_args: tuple[Any, ...]
121
+ storage_options: dict[str, Any]
122
122
 
123
123
  def __init__(self, *args, **storage_options):
124
124
  """Create and configure file-system instance
@@ -408,7 +408,7 @@ class AbstractFileSystem(metaclass=_Cached):
408
408
  topdown: bool (True)
409
409
  Whether to walk the directory tree from the top downwards or from
410
410
  the bottom upwards.
411
- on_error: "omit", "raise", a collable
411
+ on_error: "omit", "raise", a callable
412
412
  if omit (default), path with exception will simply be empty;
413
413
  If raise, an underlying exception will be raised;
414
414
  if callable, it will be called with a single OSError instance as argument
@@ -428,11 +428,9 @@ class AbstractFileSystem(metaclass=_Cached):
428
428
  except (FileNotFoundError, OSError) as e:
429
429
  if on_error == "raise":
430
430
  raise
431
- elif callable(on_error):
431
+ if callable(on_error):
432
432
  on_error(e)
433
- if detail:
434
- return path, {}, {}
435
- return path, [], []
433
+ return
436
434
 
437
435
  for info in listing:
438
436
  # each info name must be at least [path]/part , but here
@@ -617,11 +615,9 @@ class AbstractFileSystem(metaclass=_Cached):
617
615
  p: info
618
616
  for p, info in sorted(allpaths.items())
619
617
  if pattern.match(
620
- (
621
- p + "/"
622
- if append_slash_to_dirname and info["type"] == "directory"
623
- else p
624
- )
618
+ p + "/"
619
+ if append_slash_to_dirname and info["type"] == "directory"
620
+ else p
625
621
  )
626
622
  }
627
623
 
@@ -650,7 +646,7 @@ class AbstractFileSystem(metaclass=_Cached):
650
646
  Returns a single dictionary, with exactly the same information as ``ls``
651
647
  would with ``detail=True``.
652
648
 
653
- The default implementation should calls ls and could be overridden by a
649
+ The default implementation calls ls and could be overridden by a
654
650
  shortcut. kwargs are passed on to ```ls()``.
655
651
 
656
652
  Some file systems might not be able to measure the file's size, in
@@ -782,8 +778,12 @@ class AbstractFileSystem(metaclass=_Cached):
782
778
  return f.read(end - f.tell())
783
779
  return f.read()
784
780
 
785
- def pipe_file(self, path, value, **kwargs):
781
+ def pipe_file(self, path, value, mode="overwrite", **kwargs):
786
782
  """Set the bytes of given file"""
783
+ if mode == "create" and self.exists(path):
784
+ # non-atomic but simple way; or could use "xb" in open(), which is likely
785
+ # not as well supported
786
+ raise FileExistsError
787
787
  with self.open(path, "wb", **kwargs) as f:
788
788
  f.write(value)
789
789
 
@@ -975,8 +975,12 @@ class AbstractFileSystem(metaclass=_Cached):
975
975
  with callback.branched(rpath, lpath) as child:
976
976
  self.get_file(rpath, lpath, callback=child, **kwargs)
977
977
 
978
- def put_file(self, lpath, rpath, callback=DEFAULT_CALLBACK, **kwargs):
978
+ def put_file(
979
+ self, lpath, rpath, callback=DEFAULT_CALLBACK, mode="overwrite", **kwargs
980
+ ):
979
981
  """Copy single file to remote"""
982
+ if mode == "create" and self.exists(rpath):
983
+ raise FileExistsError
980
984
  if os.path.isdir(lpath):
981
985
  self.makedirs(rpath, exist_ok=True)
982
986
  return None
@@ -1266,6 +1270,9 @@ class AbstractFileSystem(metaclass=_Cached):
1266
1270
  Target file
1267
1271
  mode: str like 'rb', 'w'
1268
1272
  See builtin ``open()``
1273
+ Mode "x" (exclusive write) may be implemented by the backend. Even if
1274
+ it is, whether it is checked up front or on commit, and whether it is
1275
+ atomic is implementation-dependent.
1269
1276
  block_size: int
1270
1277
  Some indication of buffering - this is a value in bytes
1271
1278
  cache_options : dict, optional
@@ -1444,7 +1451,7 @@ class AbstractFileSystem(metaclass=_Cached):
1444
1451
 
1445
1452
  return json.loads(blob, cls=FilesystemJSONDecoder)
1446
1453
 
1447
- def to_dict(self, *, include_password: bool = True) -> Dict[str, Any]:
1454
+ def to_dict(self, *, include_password: bool = True) -> dict[str, Any]:
1448
1455
  """
1449
1456
  JSON-serializable dictionary representation of this filesystem instance.
1450
1457
 
@@ -1485,7 +1492,7 @@ class AbstractFileSystem(metaclass=_Cached):
1485
1492
  )
1486
1493
 
1487
1494
  @staticmethod
1488
- def from_dict(dct: Dict[str, Any]) -> AbstractFileSystem:
1495
+ def from_dict(dct: dict[str, Any]) -> AbstractFileSystem:
1489
1496
  """
1490
1497
  Recreate a filesystem instance from dictionary representation.
1491
1498
 
@@ -1569,6 +1576,141 @@ class AbstractFileSystem(metaclass=_Cached):
1569
1576
  """Return the modified timestamp of a file as a datetime.datetime"""
1570
1577
  raise NotImplementedError
1571
1578
 
1579
+ def tree(
1580
+ self,
1581
+ path: str = "/",
1582
+ recursion_limit: int = 2,
1583
+ max_display: int = 25,
1584
+ display_size: bool = False,
1585
+ prefix: str = "",
1586
+ is_last: bool = True,
1587
+ first: bool = True,
1588
+ indent_size: int = 4,
1589
+ ) -> str:
1590
+ """
1591
+ Return a tree-like structure of the filesystem starting from the given path as a string.
1592
+
1593
+ Parameters
1594
+ ----------
1595
+ path: Root path to start traversal from
1596
+ recursion_limit: Maximum depth of directory traversal
1597
+ max_display: Maximum number of items to display per directory
1598
+ display_size: Whether to display file sizes
1599
+ prefix: Current line prefix for visual tree structure
1600
+ is_last: Whether current item is last in its level
1601
+ first: Whether this is the first call (displays root path)
1602
+ indent_size: Number of spaces by indent
1603
+
1604
+ Returns
1605
+ -------
1606
+ str: A string representing the tree structure.
1607
+
1608
+ Example
1609
+ -------
1610
+ >>> from fsspec import filesystem
1611
+
1612
+ >>> fs = filesystem('ftp', host='test.rebex.net', user='demo', password='password')
1613
+ >>> tree = fs.tree(display_size=True, recursion_limit=3, indent_size=8, max_display=10)
1614
+ >>> print(tree)
1615
+ """
1616
+
1617
+ def format_bytes(n: int) -> str:
1618
+ """Format bytes as text."""
1619
+ for prefix, k in (
1620
+ ("P", 2**50),
1621
+ ("T", 2**40),
1622
+ ("G", 2**30),
1623
+ ("M", 2**20),
1624
+ ("k", 2**10),
1625
+ ):
1626
+ if n >= 0.9 * k:
1627
+ return f"{n / k:.2f} {prefix}b"
1628
+ return f"{n}B"
1629
+
1630
+ result = []
1631
+
1632
+ if first:
1633
+ result.append(path)
1634
+
1635
+ if recursion_limit:
1636
+ indent = " " * indent_size
1637
+ contents = self.ls(path, detail=True)
1638
+ contents.sort(
1639
+ key=lambda x: (x.get("type") != "directory", x.get("name", ""))
1640
+ )
1641
+
1642
+ if max_display is not None and len(contents) > max_display:
1643
+ displayed_contents = contents[:max_display]
1644
+ remaining_count = len(contents) - max_display
1645
+ else:
1646
+ displayed_contents = contents
1647
+ remaining_count = 0
1648
+
1649
+ for i, item in enumerate(displayed_contents):
1650
+ is_last_item = (i == len(displayed_contents) - 1) and (
1651
+ remaining_count == 0
1652
+ )
1653
+
1654
+ branch = (
1655
+ "└" + ("─" * (indent_size - 2))
1656
+ if is_last_item
1657
+ else "├" + ("─" * (indent_size - 2))
1658
+ )
1659
+ branch += " "
1660
+ new_prefix = prefix + (
1661
+ indent if is_last_item else "│" + " " * (indent_size - 1)
1662
+ )
1663
+
1664
+ name = os.path.basename(item.get("name", ""))
1665
+
1666
+ if display_size and item.get("type") == "directory":
1667
+ sub_contents = self.ls(item.get("name", ""), detail=True)
1668
+ num_files = sum(
1669
+ 1 for sub_item in sub_contents if sub_item.get("type") == "file"
1670
+ )
1671
+ num_folders = sum(
1672
+ 1
1673
+ for sub_item in sub_contents
1674
+ if sub_item.get("type") == "directory"
1675
+ )
1676
+
1677
+ if num_files == 0 and num_folders == 0:
1678
+ size = " (empty folder)"
1679
+ elif num_files == 0:
1680
+ size = f" ({num_folders} subfolder{'s' if num_folders > 1 else ''})"
1681
+ elif num_folders == 0:
1682
+ size = f" ({num_files} file{'s' if num_files > 1 else ''})"
1683
+ else:
1684
+ size = f" ({num_files} file{'s' if num_files > 1 else ''}, {num_folders} subfolder{'s' if num_folders > 1 else ''})"
1685
+ elif display_size and item.get("type") == "file":
1686
+ size = f" ({format_bytes(item.get('size', 0))})"
1687
+ else:
1688
+ size = ""
1689
+
1690
+ result.append(f"{prefix}{branch}{name}{size}")
1691
+
1692
+ if item.get("type") == "directory" and recursion_limit > 0:
1693
+ result.append(
1694
+ self.tree(
1695
+ path=item.get("name", ""),
1696
+ recursion_limit=recursion_limit - 1,
1697
+ max_display=max_display,
1698
+ display_size=display_size,
1699
+ prefix=new_prefix,
1700
+ is_last=is_last_item,
1701
+ first=False,
1702
+ indent_size=indent_size,
1703
+ )
1704
+ )
1705
+
1706
+ if remaining_count > 0:
1707
+ more_message = f"{remaining_count} more item(s) not displayed."
1708
+ result.append(
1709
+ f"{prefix}{'└' + ('─' * (indent_size - 2))} {more_message}"
1710
+ )
1711
+
1712
+ return "\n".join(_ for _ in result if _)
1713
+
1572
1714
  # ------------------------------------------------------------------------
1573
1715
  # Aliases
1574
1716
 
@@ -1733,7 +1875,7 @@ class AbstractBufferedFile(io.IOBase):
1733
1875
 
1734
1876
  self.kwargs = kwargs
1735
1877
 
1736
- if mode not in {"ab", "rb", "wb"}:
1878
+ if mode not in {"ab", "rb", "wb", "xb"}:
1737
1879
  raise NotImplementedError("File mode not supported")
1738
1880
  if mode == "rb":
1739
1881
  if size is not None:
@@ -1799,7 +1941,7 @@ class AbstractBufferedFile(io.IOBase):
1799
1941
 
1800
1942
  def info(self):
1801
1943
  """File information about this path"""
1802
- if "r" in self.mode:
1944
+ if self.readable():
1803
1945
  return self.details
1804
1946
  else:
1805
1947
  raise ValueError("Info not available while writing")
@@ -1846,7 +1988,7 @@ class AbstractBufferedFile(io.IOBase):
1846
1988
  data: bytes
1847
1989
  Set of bytes to be written.
1848
1990
  """
1849
- if self.mode not in {"wb", "ab"}:
1991
+ if not self.writable():
1850
1992
  raise ValueError("File not in write mode")
1851
1993
  if self.closed:
1852
1994
  raise ValueError("I/O operation on closed file.")
@@ -1879,7 +2021,7 @@ class AbstractBufferedFile(io.IOBase):
1879
2021
  if force:
1880
2022
  self.forced = True
1881
2023
 
1882
- if self.mode not in {"wb", "ab"}:
2024
+ if self.readable():
1883
2025
  # no-op to flush on read-mode
1884
2026
  return
1885
2027
 
@@ -1917,7 +2059,7 @@ class AbstractBufferedFile(io.IOBase):
1917
2059
 
1918
2060
  def _fetch_range(self, start, end):
1919
2061
  """Get the specified set of bytes from remote"""
1920
- raise NotImplementedError
2062
+ return self.fs.cat_file(self.path, start=start, end=end)
1921
2063
 
1922
2064
  def read(self, length=-1):
1923
2065
  """
@@ -2028,21 +2170,22 @@ class AbstractBufferedFile(io.IOBase):
2028
2170
  return
2029
2171
  if self.closed:
2030
2172
  return
2031
- if self.mode == "rb":
2032
- self.cache = None
2033
- else:
2034
- if not self.forced:
2035
- self.flush(force=True)
2036
-
2037
- if self.fs is not None:
2038
- self.fs.invalidate_cache(self.path)
2039
- self.fs.invalidate_cache(self.fs._parent(self.path))
2173
+ try:
2174
+ if self.mode == "rb":
2175
+ self.cache = None
2176
+ else:
2177
+ if not self.forced:
2178
+ self.flush(force=True)
2040
2179
 
2041
- self.closed = True
2180
+ if self.fs is not None:
2181
+ self.fs.invalidate_cache(self.path)
2182
+ self.fs.invalidate_cache(self.fs._parent(self.path))
2183
+ finally:
2184
+ self.closed = True
2042
2185
 
2043
2186
  def readable(self):
2044
2187
  """Whether opened for reading"""
2045
- return self.mode == "rb" and not self.closed
2188
+ return "r" in self.mode and not self.closed
2046
2189
 
2047
2190
  def seekable(self):
2048
2191
  """Whether is seekable (only in read mode)"""
@@ -2050,7 +2193,23 @@ class AbstractBufferedFile(io.IOBase):
2050
2193
 
2051
2194
  def writable(self):
2052
2195
  """Whether opened for writing"""
2053
- return self.mode in {"wb", "ab"} and not self.closed
2196
+ return self.mode in {"wb", "ab", "xb"} and not self.closed
2197
+
2198
+ def __reduce__(self):
2199
+ if self.mode != "rb":
2200
+ raise RuntimeError("Pickling a writeable file is not supported")
2201
+
2202
+ return reopen, (
2203
+ self.fs,
2204
+ self.path,
2205
+ self.mode,
2206
+ self.blocksize,
2207
+ self.loc,
2208
+ self.size,
2209
+ self.autocommit,
2210
+ self.cache.name if self.cache else "none",
2211
+ self.kwargs,
2212
+ )
2054
2213
 
2055
2214
  def __del__(self):
2056
2215
  if not self.closed:
@@ -2066,3 +2225,18 @@ class AbstractBufferedFile(io.IOBase):
2066
2225
 
2067
2226
  def __exit__(self, *args):
2068
2227
  self.close()
2228
+
2229
+
2230
+ def reopen(fs, path, mode, blocksize, loc, size, autocommit, cache_type, kwargs):
2231
+ file = fs.open(
2232
+ path,
2233
+ mode=mode,
2234
+ block_size=blocksize,
2235
+ autocommit=autocommit,
2236
+ cache_type=cache_type,
2237
+ size=size,
2238
+ **kwargs,
2239
+ )
2240
+ if loc > 0:
2241
+ file.seek(loc)
2242
+ return file
@@ -6,6 +6,8 @@ import pytest
6
6
  from fsspec.implementations.local import LocalFileSystem
7
7
  from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401
8
8
  from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401
9
+ from fsspec.tests.abstract.open import AbstractOpenTests # noqa: F401
10
+ from fsspec.tests.abstract.pipe import AbstractPipeTests # noqa: F401
9
11
  from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401
10
12
 
11
13
 
@@ -225,7 +227,7 @@ class BaseAbstractFixtures:
225
227
  for i in range(10):
226
228
  hashed_i = md5(str(i).encode("utf-8")).hexdigest()
227
229
  path = some_join(source, f"{hashed_i}.txt")
228
- some_fs.pipe(path=path, value=f"{i}".encode("utf-8"))
230
+ some_fs.pipe(path=path, value=f"{i}".encode())
229
231
  return source
230
232
 
231
233
 
@@ -0,0 +1,11 @@
1
+ import pytest
2
+
3
+
4
+ class AbstractOpenTests:
5
+ def test_open_exclusive(self, fs, fs_target):
6
+ with fs.open(fs_target, "wb") as f:
7
+ f.write(b"data")
8
+ with fs.open(fs_target, "rb") as f:
9
+ assert f.read() == b"data"
10
+ with pytest.raises(FileExistsError):
11
+ fs.open(fs_target, "xb")
@@ -0,0 +1,11 @@
1
+ import pytest
2
+
3
+
4
+ class AbstractPipeTests:
5
+ def test_pipe_exclusive(self, fs, fs_target):
6
+ fs.pipe_file(fs_target, b"data")
7
+ assert fs.cat_file(fs_target) == b"data"
8
+ with pytest.raises(FileExistsError):
9
+ fs.pipe_file(fs_target, b"data", mode="create")
10
+ fs.pipe_file(fs_target, b"new data", mode="overwrite")
11
+ assert fs.cat_file(fs_target) == b"new data"