fsspec 2024.12.0__py3-none-any.whl → 2025.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,20 +57,21 @@ class LocalFileSystem(AbstractFileSystem):
57
57
 
58
58
  def ls(self, path, detail=False, **kwargs):
59
59
  path = self._strip_protocol(path)
60
- info = self.info(path)
61
- if info["type"] == "directory":
60
+ path_info = self.info(path)
61
+ infos = []
62
+ if path_info["type"] == "directory":
62
63
  with os.scandir(path) as it:
63
- infos = []
64
64
  for f in it:
65
65
  try:
66
- infos.append(self.info(f))
66
+ # Only get the info if requested since it is a bit expensive (the stat call inside)
67
+ # The strip_protocol is also used in info() and calls make_path_posix to always return posix paths
68
+ info = self.info(f) if detail else self._strip_protocol(f.path)
69
+ infos.append(info)
67
70
  except FileNotFoundError:
68
71
  pass
69
72
  else:
70
- infos = [info]
73
+ infos = [path_info] if detail else [path_info["name"]]
71
74
 
72
- if not detail:
73
- return [i["name"] for i in infos]
74
75
  return infos
75
76
 
76
77
  def info(self, path, **kwargs):
@@ -10,6 +10,7 @@ from itertools import chain
10
10
  from typing import TYPE_CHECKING, Literal
11
11
 
12
12
  import fsspec.core
13
+ from fsspec.spec import AbstractBufferedFile
13
14
 
14
15
  try:
15
16
  import ujson as json
@@ -139,13 +140,13 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
139
140
 
140
141
  self.root = root
141
142
  self.chunk_sizes = {}
142
- self.out_root = out_root or self.root
143
143
  self.cat_thresh = categorical_threshold
144
144
  self.engine = engine
145
145
  self.cache_size = cache_size
146
146
  self.url = self.root + "/{field}/refs.{record}.parq"
147
147
  # TODO: derive fs from `root`
148
148
  self.fs = fsspec.filesystem("file") if fs is None else fs
149
+ self.out_root = self.fs.unstrip_protocol(out_root or self.root)
149
150
 
150
151
  from importlib.util import find_spec
151
152
 
@@ -394,10 +395,14 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
394
395
  self.write(field, record)
395
396
  else:
396
397
  # metadata or top-level
397
- self._items[key] = value
398
- new_value = json.loads(
399
- value.decode() if isinstance(value, bytes) else value
400
- )
398
+ if hasattr(value, "to_bytes"):
399
+ val = value.to_bytes().decode()
400
+ elif isinstance(value, bytes):
401
+ val = value.decode()
402
+ else:
403
+ val = value
404
+ self._items[key] = val
405
+ new_value = json.loads(val)
401
406
  self.zmetadata[key] = {**self.zmetadata.get(key, {}), **new_value}
402
407
 
403
408
  @staticmethod
@@ -493,7 +498,6 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
493
498
  }
494
499
  else:
495
500
  raise NotImplementedError(f"{self.engine} not supported")
496
-
497
501
  df.to_parquet(
498
502
  fn,
499
503
  engine=self.engine,
@@ -595,8 +599,7 @@ class ReferenceFileSystem(AsyncFileSystem):
595
599
  async, and must allow start and end args in _cat_file. Later versions
596
600
  may allow multiple arbitrary URLs for the targets.
597
601
  This FileSystem is read-only. It is designed to be used with async
598
- targets (for now). This FileSystem only allows whole-file access, no
599
- ``open``. We do not get original file details from the target FS.
602
+ targets (for now). We do not get original file details from the target FS.
600
603
  Configuration is by passing a dict of references at init, or a URL to
601
604
  a JSON file containing the same; this dict
602
605
  can also contain concrete data for some set of paths.
@@ -606,6 +609,7 @@ class ReferenceFileSystem(AsyncFileSystem):
606
609
  """
607
610
 
608
611
  protocol = "reference"
612
+ cachable = False
609
613
 
610
614
  def __init__(
611
615
  self,
@@ -761,7 +765,12 @@ class ReferenceFileSystem(AsyncFileSystem):
761
765
  # Wrap any non-async filesystems to ensure async methods are available below
762
766
  for k, f in self.fss.items():
763
767
  if not f.async_impl:
764
- self.fss[k] = AsyncFileSystemWrapper(f)
768
+ self.fss[k] = AsyncFileSystemWrapper(f, asynchronous=self.asynchronous)
769
+ elif self.asynchronous ^ f.asynchronous:
770
+ raise ValueError(
771
+ "Reference-FS's target filesystem must have same value"
772
+ "of asynchronous"
773
+ )
765
774
 
766
775
  def _cat_common(self, path, start=None, end=None):
767
776
  path = self._strip_protocol(path)
@@ -772,6 +781,8 @@ class ReferenceFileSystem(AsyncFileSystem):
772
781
  raise FileNotFoundError(path) from exc
773
782
  if isinstance(part, str):
774
783
  part = part.encode()
784
+ if hasattr(part, "to_bytes"):
785
+ part = part.to_bytes()
775
786
  if isinstance(part, bytes):
776
787
  logger.debug(f"Reference: {path}, type bytes")
777
788
  if part.startswith(b"base64:"):
@@ -1073,7 +1084,7 @@ class ReferenceFileSystem(AsyncFileSystem):
1073
1084
  self.dircache = {"": []}
1074
1085
  it = self.references.items()
1075
1086
  for path, part in it:
1076
- if isinstance(part, (bytes, str)):
1087
+ if isinstance(part, (bytes, str)) or hasattr(part, "to_bytes"):
1077
1088
  size = len(part)
1078
1089
  elif len(part) == 1:
1079
1090
  size = None
@@ -1100,10 +1111,33 @@ class ReferenceFileSystem(AsyncFileSystem):
1100
1111
  self.dircache[par].append({"name": path, "type": "file", "size": size})
1101
1112
 
1102
1113
  def _open(self, path, mode="rb", block_size=None, cache_options=None, **kwargs):
1103
- data = self.cat_file(path) # load whole chunk into memory
1104
- return io.BytesIO(data)
1114
+ part_or_url, start0, end0 = self._cat_common(path)
1115
+ # This logic is kept outside `ReferenceFile` to avoid unnecessary redirection.
1116
+ # That does mean `_cat_common` gets called twice if it eventually reaches `ReferenceFile`.
1117
+ if isinstance(part_or_url, bytes):
1118
+ return io.BytesIO(part_or_url[start0:end0])
1119
+
1120
+ protocol, _ = split_protocol(part_or_url)
1121
+ if start0 is None and end0 is None:
1122
+ return self.fss[protocol]._open(
1123
+ part_or_url,
1124
+ mode,
1125
+ block_size=block_size,
1126
+ cache_options=cache_options,
1127
+ **kwargs,
1128
+ )
1129
+
1130
+ return ReferenceFile(
1131
+ self,
1132
+ path,
1133
+ mode,
1134
+ block_size=block_size,
1135
+ cache_options=cache_options,
1136
+ **kwargs,
1137
+ )
1105
1138
 
1106
1139
  def ls(self, path, detail=True, **kwargs):
1140
+ logger.debug("list %s", path)
1107
1141
  path = self._strip_protocol(path)
1108
1142
  if isinstance(self.references, LazyReferenceMapper):
1109
1143
  try:
@@ -1214,3 +1248,58 @@ class ReferenceFileSystem(AsyncFileSystem):
1214
1248
  out[k] = v
1215
1249
  with fsspec.open(url, "wb", **storage_options) as f:
1216
1250
  f.write(json.dumps({"version": 1, "refs": out}).encode())
1251
+
1252
+
1253
+ class ReferenceFile(AbstractBufferedFile):
1254
+ def __init__(
1255
+ self,
1256
+ fs,
1257
+ path,
1258
+ mode="rb",
1259
+ block_size="default",
1260
+ autocommit=True,
1261
+ cache_type="readahead",
1262
+ cache_options=None,
1263
+ size=None,
1264
+ **kwargs,
1265
+ ):
1266
+ super().__init__(
1267
+ fs,
1268
+ path,
1269
+ mode=mode,
1270
+ block_size=block_size,
1271
+ autocommit=autocommit,
1272
+ size=size,
1273
+ cache_type=cache_type,
1274
+ cache_options=cache_options,
1275
+ **kwargs,
1276
+ )
1277
+ part_or_url, self.start, self.end = self.fs._cat_common(self.path)
1278
+ protocol, _ = split_protocol(part_or_url)
1279
+ self.src_fs = self.fs.fss[protocol]
1280
+ self.src_path = part_or_url
1281
+ self._f = None
1282
+
1283
+ @property
1284
+ def f(self):
1285
+ if self._f is None or self._f.closed:
1286
+ self._f = self.src_fs._open(
1287
+ self.src_path,
1288
+ mode=self.mode,
1289
+ block_size=self.blocksize,
1290
+ autocommit=self.autocommit,
1291
+ cache_type="none",
1292
+ **self.kwargs,
1293
+ )
1294
+ return self._f
1295
+
1296
+ def close(self):
1297
+ if self._f is not None:
1298
+ self._f.close()
1299
+ return super().close()
1300
+
1301
+ def _fetch_range(self, start, end):
1302
+ start = start + self.start
1303
+ end = min(end + self.start, self.end)
1304
+ self.f.seek(start)
1305
+ return self.f.read(end - start)
fsspec/registry.py CHANGED
@@ -72,6 +72,9 @@ known_implementations = {
72
72
  "class": "fsspec.implementations.arrow.HadoopFileSystem",
73
73
  "err": "pyarrow and local java libraries required for HDFS",
74
74
  },
75
+ "async_wrapper": {
76
+ "class": "morefs.asyn_wrapper.AsyncWrapperFileSystem",
77
+ },
75
78
  "asynclocal": {
76
79
  "class": "morefs.asyn_local.AsyncLocalFileSystem",
77
80
  "err": "Install 'morefs[asynclocalfs]' to use AsyncLocalFileSystem",
@@ -218,9 +221,9 @@ known_implementations = {
218
221
  "zip": {"class": "fsspec.implementations.zip.ZipFileSystem"},
219
222
  }
220
223
 
221
- assert list(known_implementations) == sorted(
222
- known_implementations
223
- ), "Not in alphabetical order"
224
+ assert list(known_implementations) == sorted(known_implementations), (
225
+ "Not in alphabetical order"
226
+ )
224
227
 
225
228
 
226
229
  def get_filesystem_class(protocol):
fsspec/spec.py CHANGED
@@ -382,7 +382,7 @@ class AbstractFileSystem(metaclass=_Cached):
382
382
  pass
383
383
 
384
384
  def walk(self, path, maxdepth=None, topdown=True, on_error="omit", **kwargs):
385
- """Return all files belows path
385
+ """Return all files under the given path.
386
386
 
387
387
  List all files, recursing into subdirectories; output is iterator-style,
388
388
  like ``os.walk()``. For a simple list of files, ``find()`` is available.
@@ -2131,7 +2131,7 @@ class AbstractBufferedFile(io.IOBase):
2131
2131
  return b"".join(out)
2132
2132
 
2133
2133
  def readline(self):
2134
- """Read until first occurrence of newline character
2134
+ """Read until and including the first occurrence of newline character
2135
2135
 
2136
2136
  Note that, because of character encoding, this is not necessarily a
2137
2137
  true line ending.
@@ -2148,7 +2148,7 @@ class AbstractBufferedFile(io.IOBase):
2148
2148
  return self
2149
2149
 
2150
2150
  def readlines(self):
2151
- """Return all data, split by the newline character"""
2151
+ """Return all data, split by the newline character, including the newline character"""
2152
2152
  data = self.read()
2153
2153
  lines = data.split(b"\n")
2154
2154
  out = [l + b"\n" for l in lines[:-1]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fsspec
3
- Version: 2024.12.0
3
+ Version: 2025.3.0
4
4
  Summary: File-system specification
5
5
  Project-URL: Changelog, https://filesystem-spec.readthedocs.io/en/latest/changelog.html
6
6
  Project-URL: Documentation, https://filesystem-spec.readthedocs.io/en/latest/
@@ -131,7 +131,6 @@ Requires-Dist: pytest-rerunfailures; extra == 'test'
131
131
  Requires-Dist: requests; extra == 'test'
132
132
  Provides-Extra: test-downstream
133
133
  Requires-Dist: aiobotocore<3.0.0,>=2.5.4; extra == 'test-downstream'
134
- Requires-Dist: dask-expr; extra == 'test-downstream'
135
134
  Requires-Dist: dask[dataframe,test]; extra == 'test-downstream'
136
135
  Requires-Dist: moto[server]<5,>4; extra == 'test-downstream'
137
136
  Requires-Dist: pytest-timeout; extra == 'test-downstream'
@@ -1,13 +1,13 @@
1
1
  fsspec/__init__.py,sha256=l9MJaNNV2d4wKpCtMvXDr55n92DkdrAayGy3F9ICjzk,1998
2
- fsspec/_version.py,sha256=wQ2VhCCZZrkDgAic2RGrr4PbmFQdFL4PTIWVnK3r9tM,419
3
- fsspec/archive.py,sha256=S__DzfZj-urAN3tp2W6jJ6YDiXG1fAl7FjvWUN73qIE,2386
2
+ fsspec/_version.py,sha256=mWdw-j-kCQGfmn6901GQSyjFXi2pK3h-NrKTuLRVqCw,517
3
+ fsspec/archive.py,sha256=vM6t_lgV6lBWbBYwpm3S4ofBQFQxUPr5KkDQrrQcQro,2411
4
4
  fsspec/asyn.py,sha256=rsnCsFUmBZmKJqg9m-IDWInoQtE4wV0rGDZEXZwuU3c,36500
5
5
  fsspec/caching.py,sha256=oHVy9zpy4Oqk5f1t3-Q31bbw0tsmfddGGKLJs__OdKA,32790
6
6
  fsspec/callbacks.py,sha256=BDIwLzK6rr_0V5ch557fSzsivCElpdqhXr5dZ9Te-EE,9210
7
7
  fsspec/compression.py,sha256=jCSUMJu-zSNyrusnHT0wKXgOd1tTJR6vM126i5SR5Zc,4865
8
8
  fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
9
9
  fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
10
- fsspec/core.py,sha256=bn-y3Mn9q8Gh3Ng_yAIDfIjyysQ95tuK78RlhlrqTb4,23828
10
+ fsspec/core.py,sha256=1tLctwr7sF1VO3djc_UkjhJ8IAEy0TUMH_bb07Sw17E,23828
11
11
  fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
12
12
  fsspec/exceptions.py,sha256=pauSLDMxzTJMOjvX1WEUK0cMyFkrFxpWJsyFywav7A8,331
13
13
  fsspec/fuse.py,sha256=Q-3NOOyLqBfYa4Db5E19z_ZY36zzYHtIs1mOUasItBQ,10177
@@ -16,29 +16,30 @@ fsspec/gui.py,sha256=xBnHL2-r0LVwhDAtnHoPpXts7jd4Z32peawCJiI-7lI,13975
16
16
  fsspec/json.py,sha256=65sQ0Y7mTj33u_Y4IId5up4abQ3bAel4E4QzbKMiQSg,3826
17
17
  fsspec/mapping.py,sha256=m2ndB_gtRBXYmNJg0Ie1-BVR75TFleHmIQBzC-yWhjU,8343
18
18
  fsspec/parquet.py,sha256=6ibAmG527L5JNFS0VO8BDNlxHdA3bVYqdByeiFgpUVM,19448
19
- fsspec/registry.py,sha256=A2r3PiZd17192sGHLwWNFbK8RFiDA7gSbfboIJ07wTY,11471
20
- fsspec/spec.py,sha256=d_NY5YVuwV7YCRduKkaR_z8B9GUna4-H9mOinymEMFY,75971
19
+ fsspec/registry.py,sha256=Bbur6KJilN62hx_lSRgCx9HlKrv91pLpdEPG7Vzme1M,11566
20
+ fsspec/spec.py,sha256=l7ZEbgLsnrFuS-yrGl9re6ia1Yts1_10RqGV_mT-5P8,76032
21
21
  fsspec/transaction.py,sha256=xliRG6U2Zf3khG4xcw9WiB-yAoqJSHEGK_VjHOdtgo0,2398
22
22
  fsspec/utils.py,sha256=A11t25RnpiQ30RO6xeR0Qqlu3fGj8bnc40jg08tlYSI,22980
23
23
  fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  fsspec/implementations/arrow.py,sha256=721Dikne_lV_0tlgk9jyKmHL6W-5MT0h2LKGvOYQTPI,8623
25
- fsspec/implementations/asyn_wrapper.py,sha256=cXfSkF2AaboInIIA_6jmB796RP_BXd8u08loPAHQsxQ,2864
25
+ fsspec/implementations/asyn_wrapper.py,sha256=PNkYdHiLVWwk-GJok5O6dTnhPwDaSU9QTtBTE9CIRec,3082
26
26
  fsspec/implementations/cache_mapper.py,sha256=W4wlxyPxZbSp9ItJ0pYRVBMh6bw9eFypgP6kUYuuiI4,2421
27
27
  fsspec/implementations/cache_metadata.py,sha256=pcOJYcBQY5OaC7Yhw0F3wjg08QLYApGmoISCrbs59ks,8511
28
28
  fsspec/implementations/cached.py,sha256=KA6c4jqrGeeg8WNPLsh8FkL3KeRAQtGLzKw18vSF1CI,32820
29
29
  fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
30
30
  fsspec/implementations/data.py,sha256=LDLczxRh8h7x39Zjrd-GgzdQHr78yYxDlrv2C9Uxb5E,1658
31
- fsspec/implementations/dbfs.py,sha256=a0eNjLxyfFK7pbEa52U8K-PhNHukzdGVx1eLcVniaXY,15092
32
- fsspec/implementations/dirfs.py,sha256=ymakitNNQ07tW76EShyw3rC9RvIDHl4gtuOhE_h1vUg,12032
31
+ fsspec/implementations/dbfs.py,sha256=XwpotuS_ncz3XK1dkUteww9GnTja7HoY91c0m4GUfwI,15092
32
+ fsspec/implementations/dirfs.py,sha256=f1sGnQ9Vf0xTxrXo4jDeBy4Qfq3RTqAEemqBSeb0hwY,12108
33
33
  fsspec/implementations/ftp.py,sha256=sorsczLp_2J3ukONsbZY-11sRZP6H5a3V7XXf6o6ip0,11936
34
34
  fsspec/implementations/git.py,sha256=4SElW9U5d3k3_ITlvUAx59Yk7XLNRTqkGa2C3hCUkWM,3754
35
35
  fsspec/implementations/github.py,sha256=eAn1kJ7VeWR6gVoVRLBYclF_rQDXSJU-xzMXpvPQWqs,8002
36
- fsspec/implementations/http.py,sha256=d7G7_pRTMHouKE42lvRNHqB5u4XQi0dm4wb-6U_IiF4,29361
36
+ fsspec/implementations/http.py,sha256=_gLt0yGbVOYWvE9pK81WCC-3TgbOMOKJYllBU72ALo8,30138
37
+ fsspec/implementations/http_sync.py,sha256=vHf2_O9RRlaW6k-R4hsS-91BkqyPIlc0zwo_ENNGU4U,30147
37
38
  fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
38
39
  fsspec/implementations/libarchive.py,sha256=5_I2DiLXwQ1JC8x-K7jXu-tBwhO9dj7tFLnb0bTnVMQ,7102
39
- fsspec/implementations/local.py,sha256=YvR9b2MndSQIHszAMUkFvN65eWVbIfoGJJjAeS43ZS4,15259
40
+ fsspec/implementations/local.py,sha256=g2iK8uWPGkSiI6bwmnIRXhJMQvTegCmXZ8Kb8ojhvAo,15543
40
41
  fsspec/implementations/memory.py,sha256=cLNrK9wk97sl4Tre9uVDXWj6mEHvvVVIgaVgNA5KVIg,10527
41
- fsspec/implementations/reference.py,sha256=E-XYtnsHpE1e4x2io1ILOioGsWpCLDH1bqYN3QPGUJI,45930
42
+ fsspec/implementations/reference.py,sha256=t23prs_5ugXJnYhLxLlPLPyagrx4_ofZWR_oyX9wd3Q,48703
42
43
  fsspec/implementations/sftp.py,sha256=fMY9XZcmpjszQ2tCqO_TPaJesaeD_Dv7ptYzgUPGoO0,5631
43
44
  fsspec/implementations/smb.py,sha256=5fhu8h06nOLBPh2c48aT7WBRqh9cEcbIwtyu06wTjec,15236
44
45
  fsspec/implementations/tar.py,sha256=dam78Tp_CozybNqCY2JYgGBS3Uc9FuJUAT9oB0lolOs,4111
@@ -52,7 +53,7 @@ fsspec/tests/abstract/mv.py,sha256=k8eUEBIrRrGMsBY5OOaDXdGnQUKGwDIfQyduB6YD3Ns,1
52
53
  fsspec/tests/abstract/open.py,sha256=Fi2PBPYLbRqysF8cFm0rwnB41kMdQVYjq8cGyDXp3BU,329
53
54
  fsspec/tests/abstract/pipe.py,sha256=LFzIrLCB5GLXf9rzFKJmE8AdG7LQ_h4bJo70r8FLPqM,402
54
55
  fsspec/tests/abstract/put.py,sha256=7aih17OKB_IZZh1Mkq1eBDIjobhtMQmI8x-Pw-S_aZk,21201
55
- fsspec-2024.12.0.dist-info/METADATA,sha256=bZRSxMVT7oHOUheJQEDMsChBFuby1pTg8WUbnLYATrc,11801
56
- fsspec-2024.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
- fsspec-2024.12.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
58
- fsspec-2024.12.0.dist-info/RECORD,,
56
+ fsspec-2025.3.0.dist-info/METADATA,sha256=L2OjjikjNWpjvp8i5_ec515Cd09eUi8qZ_3peQmWG8M,11747
57
+ fsspec-2025.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
58
+ fsspec-2025.3.0.dist-info/licenses/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
59
+ fsspec-2025.3.0.dist-info/RECORD,,