fsspec 2024.3.0__py3-none-any.whl → 2024.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +2 -3
- fsspec/_version.py +14 -19
- fsspec/caching.py +83 -14
- fsspec/compression.py +1 -0
- fsspec/core.py +31 -6
- fsspec/exceptions.py +1 -0
- fsspec/generic.py +1 -1
- fsspec/gui.py +1 -1
- fsspec/implementations/arrow.py +0 -2
- fsspec/implementations/cache_mapper.py +1 -2
- fsspec/implementations/cache_metadata.py +7 -7
- fsspec/implementations/dirfs.py +2 -2
- fsspec/implementations/http.py +9 -9
- fsspec/implementations/local.py +97 -48
- fsspec/implementations/memory.py +9 -0
- fsspec/implementations/smb.py +3 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
- fsspec/implementations/tests/conftest.py +39 -0
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +18 -0
- fsspec/implementations/tests/local/local_test.py +14 -0
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
- fsspec/implementations/tests/memory/memory_test.py +14 -0
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +382 -0
- fsspec/implementations/tests/test_arrow.py +259 -0
- fsspec/implementations/tests/test_cached.py +1306 -0
- fsspec/implementations/tests/test_common.py +35 -0
- fsspec/implementations/tests/test_dask.py +29 -0
- fsspec/implementations/tests/test_data.py +20 -0
- fsspec/implementations/tests/test_dbfs.py +268 -0
- fsspec/implementations/tests/test_dirfs.py +588 -0
- fsspec/implementations/tests/test_ftp.py +178 -0
- fsspec/implementations/tests/test_git.py +76 -0
- fsspec/implementations/tests/test_http.py +577 -0
- fsspec/implementations/tests/test_jupyter.py +57 -0
- fsspec/implementations/tests/test_libarchive.py +33 -0
- fsspec/implementations/tests/test_local.py +1285 -0
- fsspec/implementations/tests/test_memory.py +382 -0
- fsspec/implementations/tests/test_reference.py +720 -0
- fsspec/implementations/tests/test_sftp.py +233 -0
- fsspec/implementations/tests/test_smb.py +139 -0
- fsspec/implementations/tests/test_tar.py +243 -0
- fsspec/implementations/tests/test_webhdfs.py +197 -0
- fsspec/implementations/tests/test_zip.py +134 -0
- fsspec/implementations/webhdfs.py +1 -3
- fsspec/mapping.py +2 -2
- fsspec/parquet.py +0 -8
- fsspec/registry.py +4 -0
- fsspec/spec.py +21 -4
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/abstract/mv.py +57 -0
- fsspec/tests/conftest.py +188 -0
- fsspec/tests/data/listing.html +1 -0
- fsspec/tests/test_api.py +498 -0
- fsspec/tests/test_async.py +230 -0
- fsspec/tests/test_caches.py +255 -0
- fsspec/tests/test_callbacks.py +89 -0
- fsspec/tests/test_compression.py +164 -0
- fsspec/tests/test_config.py +129 -0
- fsspec/tests/test_core.py +466 -0
- fsspec/tests/test_downstream.py +40 -0
- fsspec/tests/test_file.py +200 -0
- fsspec/tests/test_fuse.py +147 -0
- fsspec/tests/test_generic.py +90 -0
- fsspec/tests/test_gui.py +23 -0
- fsspec/tests/test_mapping.py +228 -0
- fsspec/tests/test_parquet.py +140 -0
- fsspec/tests/test_registry.py +134 -0
- fsspec/tests/test_spec.py +1167 -0
- fsspec/tests/test_utils.py +478 -0
- fsspec/utils.py +0 -2
- fsspec-2024.5.0.dist-info/METADATA +273 -0
- fsspec-2024.5.0.dist-info/RECORD +111 -0
- {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
- fsspec-2024.3.0.dist-info/METADATA +0 -167
- fsspec-2024.3.0.dist-info/RECORD +0 -54
- fsspec-2024.3.0.dist-info/top_level.txt +0 -1
- {fsspec-2024.3.0.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
fsspec/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from importlib.metadata import entry_points
|
|
2
2
|
|
|
3
|
-
from . import
|
|
3
|
+
from . import caching
|
|
4
|
+
from ._version import __version__ # noqa: F401
|
|
4
5
|
from .callbacks import Callback
|
|
5
6
|
from .compression import available_compressions
|
|
6
7
|
from .core import get_fs_token_paths, open, open_files, open_local, url_to_fs
|
|
@@ -15,8 +16,6 @@ from .registry import (
|
|
|
15
16
|
)
|
|
16
17
|
from .spec import AbstractFileSystem
|
|
17
18
|
|
|
18
|
-
__version__ = _version.get_versions()["version"]
|
|
19
|
-
|
|
20
19
|
__all__ = [
|
|
21
20
|
"AbstractFileSystem",
|
|
22
21
|
"FSTimeoutError",
|
fsspec/_version.py
CHANGED
|
@@ -1,21 +1,16 @@
|
|
|
1
|
+
# file generated by setuptools_scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
TYPE_CHECKING = False
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from typing import Tuple, Union
|
|
6
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
7
|
+
else:
|
|
8
|
+
VERSION_TUPLE = object
|
|
1
9
|
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
10
|
+
version: str
|
|
11
|
+
__version__: str
|
|
12
|
+
__version_tuple__: VERSION_TUPLE
|
|
13
|
+
version_tuple: VERSION_TUPLE
|
|
6
14
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
version_json = '''
|
|
10
|
-
{
|
|
11
|
-
"date": "2024-03-15T16:14:56-0400",
|
|
12
|
-
"dirty": false,
|
|
13
|
-
"error": null,
|
|
14
|
-
"full-revisionid": "4bd16f64993af6092753d16df49298a19b58ce96",
|
|
15
|
-
"version": "2024.3.0"
|
|
16
|
-
}
|
|
17
|
-
''' # END VERSION_JSON
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def get_versions():
|
|
21
|
-
return json.loads(version_json)
|
|
15
|
+
__version__ = version = '2024.5.0'
|
|
16
|
+
__version_tuple__ = version_tuple = (2024, 5, 0)
|
fsspec/caching.py
CHANGED
|
@@ -56,8 +56,13 @@ class BaseCache:
|
|
|
56
56
|
|
|
57
57
|
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
|
58
58
|
self.blocksize = blocksize
|
|
59
|
+
self.nblocks = 0
|
|
59
60
|
self.fetcher = fetcher
|
|
60
61
|
self.size = size
|
|
62
|
+
self.hit_count = 0
|
|
63
|
+
self.miss_count = 0
|
|
64
|
+
# the bytes that we actually requested
|
|
65
|
+
self.total_requested_bytes = 0
|
|
61
66
|
|
|
62
67
|
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
|
63
68
|
if start is None:
|
|
@@ -68,6 +73,36 @@ class BaseCache:
|
|
|
68
73
|
return b""
|
|
69
74
|
return self.fetcher(start, stop)
|
|
70
75
|
|
|
76
|
+
def _reset_stats(self) -> None:
|
|
77
|
+
"""Reset hit and miss counts for a more ganular report e.g. by file."""
|
|
78
|
+
self.hit_count = 0
|
|
79
|
+
self.miss_count = 0
|
|
80
|
+
self.total_requested_bytes = 0
|
|
81
|
+
|
|
82
|
+
def _log_stats(self) -> str:
|
|
83
|
+
"""Return a formatted string of the cache statistics."""
|
|
84
|
+
if self.hit_count == 0 and self.miss_count == 0:
|
|
85
|
+
# a cache that does nothing, this is for logs only
|
|
86
|
+
return ""
|
|
87
|
+
return " , %s: %d hits, %d misses, %d total requested bytes" % (
|
|
88
|
+
self.name,
|
|
89
|
+
self.hit_count,
|
|
90
|
+
self.miss_count,
|
|
91
|
+
self.total_requested_bytes,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def __repr__(self) -> str:
|
|
95
|
+
# TODO: use rich for better formatting
|
|
96
|
+
return f"""
|
|
97
|
+
<{self.__class__.__name__}:
|
|
98
|
+
block size : {self.blocksize}
|
|
99
|
+
block count : {self.nblocks}
|
|
100
|
+
file size : {self.size}
|
|
101
|
+
cache hits : {self.hit_count}
|
|
102
|
+
cache misses: {self.miss_count}
|
|
103
|
+
total requested bytes: {self.total_requested_bytes}>
|
|
104
|
+
"""
|
|
105
|
+
|
|
71
106
|
|
|
72
107
|
class MMapCache(BaseCache):
|
|
73
108
|
"""memory-mapped sparse file cache
|
|
@@ -126,13 +161,18 @@ class MMapCache(BaseCache):
|
|
|
126
161
|
start_block = start // self.blocksize
|
|
127
162
|
end_block = end // self.blocksize
|
|
128
163
|
need = [i for i in range(start_block, end_block + 1) if i not in self.blocks]
|
|
164
|
+
hits = [i for i in range(start_block, end_block + 1) if i in self.blocks]
|
|
165
|
+
self.miss_count += len(need)
|
|
166
|
+
self.hit_count += len(hits)
|
|
129
167
|
while need:
|
|
130
168
|
# TODO: not a for loop so we can consolidate blocks later to
|
|
131
169
|
# make fewer fetch calls; this could be parallel
|
|
132
170
|
i = need.pop(0)
|
|
171
|
+
|
|
133
172
|
sstart = i * self.blocksize
|
|
134
173
|
send = min(sstart + self.blocksize, self.size)
|
|
135
|
-
|
|
174
|
+
self.total_requested_bytes += send - sstart
|
|
175
|
+
logger.debug(f"MMap get block #{i} ({sstart}-{send})")
|
|
136
176
|
self.cache[sstart:send] = self.fetcher(sstart, send)
|
|
137
177
|
self.blocks.add(i)
|
|
138
178
|
|
|
@@ -176,16 +216,20 @@ class ReadAheadCache(BaseCache):
|
|
|
176
216
|
l = end - start
|
|
177
217
|
if start >= self.start and end <= self.end:
|
|
178
218
|
# cache hit
|
|
219
|
+
self.hit_count += 1
|
|
179
220
|
return self.cache[start - self.start : end - self.start]
|
|
180
221
|
elif self.start <= start < self.end:
|
|
181
222
|
# partial hit
|
|
223
|
+
self.miss_count += 1
|
|
182
224
|
part = self.cache[start - self.start :]
|
|
183
225
|
l -= len(part)
|
|
184
226
|
start = self.end
|
|
185
227
|
else:
|
|
186
228
|
# miss
|
|
229
|
+
self.miss_count += 1
|
|
187
230
|
part = b""
|
|
188
231
|
end = min(self.size, end + self.blocksize)
|
|
232
|
+
self.total_requested_bytes += end - start
|
|
189
233
|
self.cache = self.fetcher(start, end) # new block replaces old
|
|
190
234
|
self.start = start
|
|
191
235
|
self.end = self.start + len(self.cache)
|
|
@@ -202,24 +246,39 @@ class FirstChunkCache(BaseCache):
|
|
|
202
246
|
name = "first"
|
|
203
247
|
|
|
204
248
|
def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
|
|
249
|
+
if blocksize > size:
|
|
250
|
+
# this will buffer the whole thing
|
|
251
|
+
blocksize = size
|
|
205
252
|
super().__init__(blocksize, fetcher, size)
|
|
206
253
|
self.cache: bytes | None = None
|
|
207
254
|
|
|
208
255
|
def _fetch(self, start: int | None, end: int | None) -> bytes:
|
|
209
256
|
start = start or 0
|
|
210
|
-
|
|
257
|
+
if start > self.size:
|
|
258
|
+
logger.debug("FirstChunkCache: requested start > file size")
|
|
259
|
+
return b""
|
|
260
|
+
|
|
261
|
+
end = min(end, self.size)
|
|
262
|
+
|
|
211
263
|
if start < self.blocksize:
|
|
212
264
|
if self.cache is None:
|
|
265
|
+
self.miss_count += 1
|
|
213
266
|
if end > self.blocksize:
|
|
267
|
+
self.total_requested_bytes += end
|
|
214
268
|
data = self.fetcher(0, end)
|
|
215
269
|
self.cache = data[: self.blocksize]
|
|
216
270
|
return data[start:]
|
|
217
271
|
self.cache = self.fetcher(0, self.blocksize)
|
|
272
|
+
self.total_requested_bytes += self.blocksize
|
|
218
273
|
part = self.cache[start:end]
|
|
219
274
|
if end > self.blocksize:
|
|
275
|
+
self.total_requested_bytes += end - self.blocksize
|
|
220
276
|
part += self.fetcher(self.blocksize, end)
|
|
277
|
+
self.hit_count += 1
|
|
221
278
|
return part
|
|
222
279
|
else:
|
|
280
|
+
self.miss_count += 1
|
|
281
|
+
self.total_requested_bytes += end - start
|
|
223
282
|
return self.fetcher(start, end)
|
|
224
283
|
|
|
225
284
|
|
|
@@ -256,12 +315,6 @@ class BlockCache(BaseCache):
|
|
|
256
315
|
self.maxblocks = maxblocks
|
|
257
316
|
self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
|
|
258
317
|
|
|
259
|
-
def __repr__(self) -> str:
|
|
260
|
-
return (
|
|
261
|
-
f"<BlockCache blocksize={self.blocksize}, "
|
|
262
|
-
f"size={self.size}, nblocks={self.nblocks}>"
|
|
263
|
-
)
|
|
264
|
-
|
|
265
318
|
def cache_info(self):
|
|
266
319
|
"""
|
|
267
320
|
The statistics on the block cache.
|
|
@@ -319,6 +372,8 @@ class BlockCache(BaseCache):
|
|
|
319
372
|
|
|
320
373
|
start = block_number * self.blocksize
|
|
321
374
|
end = start + self.blocksize
|
|
375
|
+
self.total_requested_bytes += end - start
|
|
376
|
+
self.miss_count += 1
|
|
322
377
|
logger.info("BlockCache fetching block %d", block_number)
|
|
323
378
|
block_contents = super()._fetch(start, end)
|
|
324
379
|
return block_contents
|
|
@@ -339,6 +394,7 @@ class BlockCache(BaseCache):
|
|
|
339
394
|
start_pos = start % self.blocksize
|
|
340
395
|
end_pos = end % self.blocksize
|
|
341
396
|
|
|
397
|
+
self.hit_count += 1
|
|
342
398
|
if start_block_number == end_block_number:
|
|
343
399
|
block: bytes = self._fetch_block_cached(start_block_number)
|
|
344
400
|
return block[start_pos:end_pos]
|
|
@@ -404,6 +460,7 @@ class BytesCache(BaseCache):
|
|
|
404
460
|
):
|
|
405
461
|
# cache hit: we have all the required data
|
|
406
462
|
offset = start - self.start
|
|
463
|
+
self.hit_count += 1
|
|
407
464
|
return self.cache[offset : offset + end - start]
|
|
408
465
|
|
|
409
466
|
if self.blocksize:
|
|
@@ -418,17 +475,22 @@ class BytesCache(BaseCache):
|
|
|
418
475
|
self.end is None or end > self.end
|
|
419
476
|
):
|
|
420
477
|
# First read, or extending both before and after
|
|
478
|
+
self.total_requested_bytes += bend - start
|
|
479
|
+
self.miss_count += 1
|
|
421
480
|
self.cache = self.fetcher(start, bend)
|
|
422
481
|
self.start = start
|
|
423
482
|
else:
|
|
424
483
|
assert self.start is not None
|
|
425
484
|
assert self.end is not None
|
|
485
|
+
self.miss_count += 1
|
|
426
486
|
|
|
427
487
|
if start < self.start:
|
|
428
488
|
if self.end is None or self.end - end > self.blocksize:
|
|
489
|
+
self.total_requested_bytes += bend - start
|
|
429
490
|
self.cache = self.fetcher(start, bend)
|
|
430
491
|
self.start = start
|
|
431
492
|
else:
|
|
493
|
+
self.total_requested_bytes += self.start - start
|
|
432
494
|
new = self.fetcher(start, self.start)
|
|
433
495
|
self.start = start
|
|
434
496
|
self.cache = new + self.cache
|
|
@@ -436,9 +498,11 @@ class BytesCache(BaseCache):
|
|
|
436
498
|
if self.end > self.size:
|
|
437
499
|
pass
|
|
438
500
|
elif end - self.end > self.blocksize:
|
|
501
|
+
self.total_requested_bytes += bend - start
|
|
439
502
|
self.cache = self.fetcher(start, bend)
|
|
440
503
|
self.start = start
|
|
441
504
|
else:
|
|
505
|
+
self.total_requested_bytes += bend - self.end
|
|
442
506
|
new = self.fetcher(self.end, bend)
|
|
443
507
|
self.cache = self.cache + new
|
|
444
508
|
|
|
@@ -470,10 +534,13 @@ class AllBytes(BaseCache):
|
|
|
470
534
|
) -> None:
|
|
471
535
|
super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
|
|
472
536
|
if data is None:
|
|
537
|
+
self.miss_count += 1
|
|
538
|
+
self.total_requested_bytes += self.size
|
|
473
539
|
data = self.fetcher(0, self.size)
|
|
474
540
|
self.data = data
|
|
475
541
|
|
|
476
542
|
def _fetch(self, start: int | None, stop: int | None) -> bytes:
|
|
543
|
+
self.hit_count += 1
|
|
477
544
|
return self.data[start:stop]
|
|
478
545
|
|
|
479
546
|
|
|
@@ -551,6 +618,7 @@ class KnownPartsOfAFile(BaseCache):
|
|
|
551
618
|
# are allowed to pad reads beyond the
|
|
552
619
|
# buffer with zero
|
|
553
620
|
out += b"\x00" * (stop - start - len(out))
|
|
621
|
+
self.hit_count += 1
|
|
554
622
|
return out
|
|
555
623
|
else:
|
|
556
624
|
# The request ends outside a known range,
|
|
@@ -572,6 +640,8 @@ class KnownPartsOfAFile(BaseCache):
|
|
|
572
640
|
f"IO/caching performance may be poor!"
|
|
573
641
|
)
|
|
574
642
|
logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
|
|
643
|
+
self.total_requested_bytes += stop - start
|
|
644
|
+
self.miss_count += 1
|
|
575
645
|
return out + super()._fetch(start, stop)
|
|
576
646
|
|
|
577
647
|
|
|
@@ -676,12 +746,6 @@ class BackgroundBlockCache(BaseCache):
|
|
|
676
746
|
self._fetch_future: Future[bytes] | None = None
|
|
677
747
|
self._fetch_future_lock = threading.Lock()
|
|
678
748
|
|
|
679
|
-
def __repr__(self) -> str:
|
|
680
|
-
return (
|
|
681
|
-
f"<BackgroundBlockCache blocksize={self.blocksize}, "
|
|
682
|
-
f"size={self.size}, nblocks={self.nblocks}>"
|
|
683
|
-
)
|
|
684
|
-
|
|
685
749
|
def cache_info(self) -> UpdatableLRU.CacheInfo:
|
|
686
750
|
"""
|
|
687
751
|
The statistics on the block cache.
|
|
@@ -799,6 +863,8 @@ class BackgroundBlockCache(BaseCache):
|
|
|
799
863
|
start = block_number * self.blocksize
|
|
800
864
|
end = start + self.blocksize
|
|
801
865
|
logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
|
|
866
|
+
self.total_requested_bytes += end - start
|
|
867
|
+
self.miss_count += 1
|
|
802
868
|
block_contents = super()._fetch(start, end)
|
|
803
869
|
return block_contents
|
|
804
870
|
|
|
@@ -818,6 +884,9 @@ class BackgroundBlockCache(BaseCache):
|
|
|
818
884
|
start_pos = start % self.blocksize
|
|
819
885
|
end_pos = end % self.blocksize
|
|
820
886
|
|
|
887
|
+
# kind of pointless to count this as a hit, but it is
|
|
888
|
+
self.hit_count += 1
|
|
889
|
+
|
|
821
890
|
if start_block_number == end_block_number:
|
|
822
891
|
block = self._fetch_block_cached(start_block_number)
|
|
823
892
|
return block[start_pos:end_pos]
|
fsspec/compression.py
CHANGED
fsspec/core.py
CHANGED
|
@@ -8,7 +8,7 @@ from glob import has_magic
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
# for backwards compat, we export cache things from here too
|
|
11
|
-
from .caching import ( # noqa: F401
|
|
11
|
+
from fsspec.caching import ( # noqa: F401
|
|
12
12
|
BaseCache,
|
|
13
13
|
BlockCache,
|
|
14
14
|
BytesCache,
|
|
@@ -16,9 +16,10 @@ from .caching import ( # noqa: F401
|
|
|
16
16
|
ReadAheadCache,
|
|
17
17
|
caches,
|
|
18
18
|
)
|
|
19
|
-
from .compression import compr
|
|
20
|
-
from .
|
|
21
|
-
from .
|
|
19
|
+
from fsspec.compression import compr
|
|
20
|
+
from fsspec.config import conf
|
|
21
|
+
from fsspec.registry import filesystem, get_filesystem_class
|
|
22
|
+
from fsspec.utils import (
|
|
22
23
|
_unstrip_protocol,
|
|
23
24
|
build_name_function,
|
|
24
25
|
infer_compression,
|
|
@@ -100,7 +101,18 @@ class OpenFile:
|
|
|
100
101
|
def __enter__(self):
|
|
101
102
|
mode = self.mode.replace("t", "").replace("b", "") + "b"
|
|
102
103
|
|
|
103
|
-
|
|
104
|
+
try:
|
|
105
|
+
f = self.fs.open(self.path, mode=mode)
|
|
106
|
+
except FileNotFoundError as e:
|
|
107
|
+
if has_magic(self.path):
|
|
108
|
+
raise FileNotFoundError(
|
|
109
|
+
"%s not found. The URL contains glob characters: you maybe needed\n"
|
|
110
|
+
"to pass expand=True in fsspec.open() or the storage_options of \n"
|
|
111
|
+
"your library. You can also set the config value 'open_expand'\n"
|
|
112
|
+
"before import, or fsspec.core.DEFAULT_EXPAND at runtime, to True.",
|
|
113
|
+
self.path,
|
|
114
|
+
) from e
|
|
115
|
+
raise
|
|
104
116
|
|
|
105
117
|
self.fobjects = [f]
|
|
106
118
|
|
|
@@ -367,6 +379,7 @@ def url_to_fs(url, **kwargs):
|
|
|
367
379
|
urlpath : str
|
|
368
380
|
The file-systems-specific URL for ``url``.
|
|
369
381
|
"""
|
|
382
|
+
url = stringify_path(url)
|
|
370
383
|
# non-FS arguments that appear in fsspec.open()
|
|
371
384
|
# inspect could keep this in sync with open()'s signature
|
|
372
385
|
known_kwargs = {
|
|
@@ -396,6 +409,9 @@ def url_to_fs(url, **kwargs):
|
|
|
396
409
|
return fs, urlpath
|
|
397
410
|
|
|
398
411
|
|
|
412
|
+
DEFAULT_EXPAND = conf.get("open_expand", False)
|
|
413
|
+
|
|
414
|
+
|
|
399
415
|
def open(
|
|
400
416
|
urlpath,
|
|
401
417
|
mode="rb",
|
|
@@ -404,6 +420,7 @@ def open(
|
|
|
404
420
|
errors=None,
|
|
405
421
|
protocol=None,
|
|
406
422
|
newline=None,
|
|
423
|
+
expand=None,
|
|
407
424
|
**kwargs,
|
|
408
425
|
):
|
|
409
426
|
"""Given a path or paths, return one ``OpenFile`` object.
|
|
@@ -428,6 +445,13 @@ def open(
|
|
|
428
445
|
newline: bytes or None
|
|
429
446
|
Used for line terminator in text mode. If None, uses system default;
|
|
430
447
|
if blank, uses no translation.
|
|
448
|
+
expand: bool or Nonw
|
|
449
|
+
Whether to regard file paths containing special glob characters as needing
|
|
450
|
+
expansion (finding the first match) or absolute. Setting False allows using
|
|
451
|
+
paths which do embed such characters. If None (default), this argument
|
|
452
|
+
takes its value from the DEFAULT_EXPAND module variable, which takes
|
|
453
|
+
its initial value from the "open_expand" config value at startup, which will
|
|
454
|
+
be False if not set.
|
|
431
455
|
**kwargs: dict
|
|
432
456
|
Extra options that make sense to a particular storage connection, e.g.
|
|
433
457
|
host, port, username, password, etc.
|
|
@@ -456,6 +480,7 @@ def open(
|
|
|
456
480
|
- For implementations in separate packages see
|
|
457
481
|
https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
|
|
458
482
|
"""
|
|
483
|
+
expand = DEFAULT_EXPAND if expand is None else expand
|
|
459
484
|
out = open_files(
|
|
460
485
|
urlpath=[urlpath],
|
|
461
486
|
mode=mode,
|
|
@@ -464,7 +489,7 @@ def open(
|
|
|
464
489
|
errors=errors,
|
|
465
490
|
protocol=protocol,
|
|
466
491
|
newline=newline,
|
|
467
|
-
expand=
|
|
492
|
+
expand=expand,
|
|
468
493
|
**kwargs,
|
|
469
494
|
)
|
|
470
495
|
if not out:
|
fsspec/exceptions.py
CHANGED
fsspec/generic.py
CHANGED
|
@@ -139,7 +139,7 @@ def rsync(
|
|
|
139
139
|
source_files, target_files = zip(*allfiles.items())
|
|
140
140
|
fs.cp(source_files, target_files, **kwargs)
|
|
141
141
|
logger.debug(f"{len(to_delete)} files to delete")
|
|
142
|
-
if delete_missing:
|
|
142
|
+
if delete_missing and to_delete:
|
|
143
143
|
fs.rm(to_delete)
|
|
144
144
|
return allfiles
|
|
145
145
|
|
fsspec/gui.py
CHANGED
|
@@ -94,7 +94,7 @@ class SigSlot:
|
|
|
94
94
|
try:
|
|
95
95
|
return self.panel._repr_mimebundle_(*args, **kwargs)
|
|
96
96
|
except (ValueError, AttributeError):
|
|
97
|
-
raise NotImplementedError("Panel does not seem to be set
|
|
97
|
+
raise NotImplementedError("Panel does not seem to be set up properly")
|
|
98
98
|
|
|
99
99
|
def connect(self, signal, slot):
|
|
100
100
|
"""Associate call back with given event
|
fsspec/implementations/arrow.py
CHANGED
|
@@ -12,8 +12,7 @@ class AbstractCacheMapper(abc.ABC):
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
@abc.abstractmethod
|
|
15
|
-
def __call__(self, path: str) -> str:
|
|
16
|
-
...
|
|
15
|
+
def __call__(self, path: str) -> str: ...
|
|
17
16
|
|
|
18
17
|
def __eq__(self, other: object) -> bool:
|
|
19
18
|
# Identity only depends on class. When derived classes have attributes
|
|
@@ -57,10 +57,14 @@ class CacheMetadata:
|
|
|
57
57
|
"""Low-level function to load metadata from specific file"""
|
|
58
58
|
try:
|
|
59
59
|
with open(fn, "r") as f:
|
|
60
|
-
|
|
60
|
+
loaded = json.load(f)
|
|
61
61
|
except ValueError:
|
|
62
62
|
with open(fn, "rb") as f:
|
|
63
|
-
|
|
63
|
+
loaded = pickle.load(f)
|
|
64
|
+
for c in loaded.values():
|
|
65
|
+
if isinstance(c.get("blocks"), list):
|
|
66
|
+
c["blocks"] = set(c["blocks"])
|
|
67
|
+
return loaded
|
|
64
68
|
|
|
65
69
|
def _save(self, metadata_to_save: Detail, fn: str) -> None:
|
|
66
70
|
"""Low-level function to save metadata to specific file"""
|
|
@@ -152,11 +156,7 @@ class CacheMetadata:
|
|
|
152
156
|
for fn, _, _ in self._scan_locations():
|
|
153
157
|
if os.path.exists(fn):
|
|
154
158
|
# TODO: consolidate blocks here
|
|
155
|
-
|
|
156
|
-
for c in loaded_cached_files.values():
|
|
157
|
-
if isinstance(c["blocks"], list):
|
|
158
|
-
c["blocks"] = set(c["blocks"])
|
|
159
|
-
cached_files.append(loaded_cached_files)
|
|
159
|
+
cached_files.append(self._load(fn))
|
|
160
160
|
else:
|
|
161
161
|
cached_files.append({})
|
|
162
162
|
self.cached_files = cached_files or [{}]
|
fsspec/implementations/dirfs.py
CHANGED
|
@@ -329,8 +329,8 @@ class DirFileSystem(AsyncFileSystem):
|
|
|
329
329
|
def rmdir(self, path):
|
|
330
330
|
return self.fs.rmdir(self._join(path))
|
|
331
331
|
|
|
332
|
-
def
|
|
333
|
-
return self.fs.
|
|
332
|
+
def mv(self, path1, path2, **kwargs):
|
|
333
|
+
return self.fs.mv(
|
|
334
334
|
self._join(path1),
|
|
335
335
|
self._join(path2),
|
|
336
336
|
**kwargs,
|
fsspec/implementations/http.py
CHANGED
|
@@ -451,7 +451,7 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
451
451
|
|
|
452
452
|
ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
|
|
453
453
|
path = self._strip_protocol(path)
|
|
454
|
-
append_slash_to_dirname = ends_with_slash or path.endswith("/**")
|
|
454
|
+
append_slash_to_dirname = ends_with_slash or path.endswith(("/**", "/*"))
|
|
455
455
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
456
456
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
457
457
|
|
|
@@ -494,15 +494,15 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
494
494
|
pattern = re.compile(pattern)
|
|
495
495
|
|
|
496
496
|
out = {
|
|
497
|
-
|
|
497
|
+
(
|
|
498
|
+
p.rstrip("/")
|
|
499
|
+
if not append_slash_to_dirname
|
|
500
|
+
and info["type"] == "directory"
|
|
501
|
+
and p.endswith("/")
|
|
502
|
+
else p
|
|
503
|
+
): info
|
|
498
504
|
for p, info in sorted(allpaths.items())
|
|
499
|
-
if pattern.match(
|
|
500
|
-
(
|
|
501
|
-
p + "/"
|
|
502
|
-
if append_slash_to_dirname and info["type"] == "directory"
|
|
503
|
-
else p
|
|
504
|
-
)
|
|
505
|
-
)
|
|
505
|
+
if pattern.match(p.rstrip("/"))
|
|
506
506
|
}
|
|
507
507
|
|
|
508
508
|
if detail:
|