fsspec 2023.10.0__tar.gz → 2023.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fsspec-2023.10.0/fsspec.egg-info → fsspec-2023.12.0}/PKG-INFO +1 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/_version.py +3 -3
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/asyn.py +34 -44
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/caching.py +1 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/core.py +16 -3
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/generic.py +4 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/gui.py +1 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/cached.py +70 -14
- fsspec-2023.12.0/fsspec/implementations/data.py +48 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/http.py +28 -41
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/memory.py +1 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/reference.py +49 -16
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/smb.py +0 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/webhdfs.py +26 -4
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/registry.py +2 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/spec.py +36 -48
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/common.py +5 -5
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/transaction.py +8 -4
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/utils.py +114 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0/fsspec.egg-info}/PKG-INFO +1 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/SOURCES.txt +1 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/pyproject.toml +1 -1
- {fsspec-2023.10.0 → fsspec-2023.12.0}/setup.cfg +2 -2
- {fsspec-2023.10.0 → fsspec-2023.12.0}/LICENSE +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/MANIFEST.in +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/README.md +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/__init__.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/archive.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/callbacks.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/compression.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/config.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/conftest.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/dircache.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/exceptions.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/fuse.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/__init__.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/arrow.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/cache_mapper.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/cache_metadata.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/dask.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/dbfs.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/dirfs.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/ftp.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/git.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/github.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/jupyter.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/libarchive.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/local.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/sftp.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/tar.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/implementations/zip.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/mapping.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/parquet.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/__init__.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/copy.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/get.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec/tests/abstract/put.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/dependency_links.txt +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/not-zip-safe +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/requires.txt +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/fsspec.egg-info/top_level.txt +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/requirements.txt +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/setup.py +0 -0
- {fsspec-2023.10.0 → fsspec-2023.12.0}/versioneer.py +0 -0
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2023-
|
|
11
|
+
"date": "2023-12-02T20:51:30-0500",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "2023.
|
|
14
|
+
"full-revisionid": "5cf9cd952c5d276835d3caef9c32fcf69d55b10c",
|
|
15
|
+
"version": "2023.12.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -15,7 +15,7 @@ from .callbacks import _DEFAULT_CALLBACK
|
|
|
15
15
|
from .exceptions import FSTimeoutError
|
|
16
16
|
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
|
17
17
|
from .spec import AbstractBufferedFile, AbstractFileSystem
|
|
18
|
-
from .utils import is_exception, other_paths
|
|
18
|
+
from .utils import glob_translate, is_exception, other_paths
|
|
19
19
|
|
|
20
20
|
private = re.compile("_[^_]")
|
|
21
21
|
iothread = [None] # dedicated fsspec IO thread
|
|
@@ -106,7 +106,7 @@ def sync(loop, func, *args, timeout=None, **kwargs):
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def sync_wrapper(func, obj=None):
|
|
109
|
-
"""Given a function, make so can be called in
|
|
109
|
+
"""Given a function, make so can be called in blocking contexts
|
|
110
110
|
|
|
111
111
|
Leave obj=None if defining within a class. Pass the instance if attaching
|
|
112
112
|
as an attribute of the instance.
|
|
@@ -467,6 +467,16 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
467
467
|
on_error="return",
|
|
468
468
|
**kwargs,
|
|
469
469
|
):
|
|
470
|
+
"""Get the contents of byte ranges from one or more files
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
paths: list
|
|
475
|
+
A list of of filepaths on this filesystems
|
|
476
|
+
starts, ends: int or list
|
|
477
|
+
Bytes limits of the read. If using a single int, the same value will be
|
|
478
|
+
used to read all the specified files.
|
|
479
|
+
"""
|
|
470
480
|
# TODO: on_error
|
|
471
481
|
if max_gap is not None:
|
|
472
482
|
# use utils.merge_offset_ranges
|
|
@@ -476,7 +486,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
476
486
|
if not isinstance(starts, Iterable):
|
|
477
487
|
starts = [starts] * len(paths)
|
|
478
488
|
if not isinstance(ends, Iterable):
|
|
479
|
-
ends = [
|
|
489
|
+
ends = [ends] * len(paths)
|
|
480
490
|
if len(starts) != len(paths) or len(ends) != len(paths):
|
|
481
491
|
raise ValueError
|
|
482
492
|
coros = [
|
|
@@ -662,9 +672,9 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
662
672
|
[self._size(p) for p in paths], batch_size=batch_size
|
|
663
673
|
)
|
|
664
674
|
|
|
665
|
-
async def _exists(self, path):
|
|
675
|
+
async def _exists(self, path, **kwargs):
|
|
666
676
|
try:
|
|
667
|
-
await self._info(path)
|
|
677
|
+
await self._info(path, **kwargs)
|
|
668
678
|
return True
|
|
669
679
|
except FileNotFoundError:
|
|
670
680
|
return False
|
|
@@ -735,8 +745,12 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
735
745
|
|
|
736
746
|
import re
|
|
737
747
|
|
|
738
|
-
|
|
748
|
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
|
749
|
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
|
739
750
|
path = self._strip_protocol(path)
|
|
751
|
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
|
752
|
+
tuple(sep + "**" for sep in seps)
|
|
753
|
+
)
|
|
740
754
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
741
755
|
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
|
742
756
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
@@ -746,11 +760,11 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
746
760
|
detail = kwargs.pop("detail", False)
|
|
747
761
|
|
|
748
762
|
if not has_magic(path):
|
|
749
|
-
if await self._exists(path):
|
|
763
|
+
if await self._exists(path, **kwargs):
|
|
750
764
|
if not detail:
|
|
751
765
|
return [path]
|
|
752
766
|
else:
|
|
753
|
-
return {path: await self._info(path)}
|
|
767
|
+
return {path: await self._info(path, **kwargs)}
|
|
754
768
|
else:
|
|
755
769
|
if not detail:
|
|
756
770
|
return [] # glob of non-existent returns empty
|
|
@@ -775,46 +789,22 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
775
789
|
allpaths = await self._find(
|
|
776
790
|
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
|
777
791
|
)
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
781
|
-
# for shell globbing details.
|
|
782
|
-
pattern = (
|
|
783
|
-
"^"
|
|
784
|
-
+ (
|
|
785
|
-
path.replace("\\", r"\\")
|
|
786
|
-
.replace(".", r"\.")
|
|
787
|
-
.replace("+", r"\+")
|
|
788
|
-
.replace("//", "/")
|
|
789
|
-
.replace("(", r"\(")
|
|
790
|
-
.replace(")", r"\)")
|
|
791
|
-
.replace("|", r"\|")
|
|
792
|
-
.replace("^", r"\^")
|
|
793
|
-
.replace("$", r"\$")
|
|
794
|
-
.replace("{", r"\{")
|
|
795
|
-
.replace("}", r"\}")
|
|
796
|
-
.rstrip("/")
|
|
797
|
-
.replace("?", ".")
|
|
798
|
-
)
|
|
799
|
-
+ "$"
|
|
800
|
-
)
|
|
801
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
802
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
803
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
804
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
805
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
792
|
+
|
|
793
|
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
|
806
794
|
pattern = re.compile(pattern)
|
|
795
|
+
|
|
807
796
|
out = {
|
|
808
|
-
p:
|
|
809
|
-
for p in sorted(allpaths)
|
|
810
|
-
if pattern.match(
|
|
797
|
+
p: info
|
|
798
|
+
for p, info in sorted(allpaths.items())
|
|
799
|
+
if pattern.match(
|
|
800
|
+
(
|
|
801
|
+
p + "/"
|
|
802
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
803
|
+
else p
|
|
804
|
+
)
|
|
805
|
+
)
|
|
811
806
|
}
|
|
812
807
|
|
|
813
|
-
# Return directories only when the glob end by a slash
|
|
814
|
-
# This is needed for posix glob compliance
|
|
815
|
-
if ends:
|
|
816
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
817
|
-
|
|
818
808
|
if detail:
|
|
819
809
|
return out
|
|
820
810
|
else:
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import io
|
|
2
4
|
import logging
|
|
3
5
|
import os
|
|
4
6
|
import re
|
|
5
7
|
from glob import has_magic
|
|
8
|
+
from pathlib import Path
|
|
6
9
|
|
|
7
10
|
# for backwards compat, we export cache things from here too
|
|
8
11
|
from .caching import ( # noqa: F401
|
|
@@ -290,7 +293,11 @@ def open_files(
|
|
|
290
293
|
fs.auto_mkdir = auto_mkdir
|
|
291
294
|
elif "r" not in mode and auto_mkdir:
|
|
292
295
|
parents = {fs._parent(path) for path in paths}
|
|
293
|
-
|
|
296
|
+
for parent in parents:
|
|
297
|
+
try:
|
|
298
|
+
fs.makedirs(parent, exist_ok=True)
|
|
299
|
+
except PermissionError:
|
|
300
|
+
pass
|
|
294
301
|
return OpenFiles(
|
|
295
302
|
[
|
|
296
303
|
OpenFile(
|
|
@@ -465,7 +472,11 @@ def open(
|
|
|
465
472
|
return out[0]
|
|
466
473
|
|
|
467
474
|
|
|
468
|
-
def open_local(
|
|
475
|
+
def open_local(
|
|
476
|
+
url: str | list[str] | Path | list[Path],
|
|
477
|
+
mode: str = "rb",
|
|
478
|
+
**storage_options: dict,
|
|
479
|
+
) -> str | list[str]:
|
|
469
480
|
"""Open file(s) which can be resolved to local
|
|
470
481
|
|
|
471
482
|
For files which either are local, or get downloaded upon open
|
|
@@ -489,7 +500,7 @@ def open_local(url, mode="rb", **storage_options):
|
|
|
489
500
|
)
|
|
490
501
|
with of as files:
|
|
491
502
|
paths = [f.name for f in files]
|
|
492
|
-
if isinstance(url, str) and not has_magic(url):
|
|
503
|
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
|
493
504
|
return paths[0]
|
|
494
505
|
return paths
|
|
495
506
|
|
|
@@ -510,6 +521,8 @@ def split_protocol(urlpath):
|
|
|
510
521
|
if len(protocol) > 1:
|
|
511
522
|
# excludes Windows paths
|
|
512
523
|
return protocol, path
|
|
524
|
+
if ":" in urlpath and urlpath.find(":") > 1:
|
|
525
|
+
return urlpath.split(":", 1)
|
|
513
526
|
return None, urlpath
|
|
514
527
|
|
|
515
528
|
|
|
@@ -171,6 +171,10 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
171
171
|
self.method = default_method
|
|
172
172
|
super().__init__(**kwargs)
|
|
173
173
|
|
|
174
|
+
def _parent(self, path):
|
|
175
|
+
fs = _resolve_fs(path, self.method)
|
|
176
|
+
return fs.unstrip_protocol(fs._parent(path))
|
|
177
|
+
|
|
174
178
|
def _strip_protocol(self, path):
|
|
175
179
|
# normalization only
|
|
176
180
|
fs = _resolve_fs(path, self.method)
|
|
@@ -242,7 +242,7 @@ class FileSelector(SigSlot):
|
|
|
242
242
|
else:
|
|
243
243
|
self.init_protocol, url = "file", os.getcwd()
|
|
244
244
|
self.init_url = url
|
|
245
|
-
self.init_kwargs = kwargs or "{}"
|
|
245
|
+
self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
|
|
246
246
|
self.filters = filters
|
|
247
247
|
self.ignore = [re.compile(i) for i in ignore or []]
|
|
248
248
|
self._fs = None
|
|
@@ -17,6 +17,7 @@ from fsspec.exceptions import BlocksizeMismatchError
|
|
|
17
17
|
from fsspec.implementations.cache_mapper import create_cache_mapper
|
|
18
18
|
from fsspec.implementations.cache_metadata import CacheMetadata
|
|
19
19
|
from fsspec.spec import AbstractBufferedFile
|
|
20
|
+
from fsspec.transaction import Transaction
|
|
20
21
|
from fsspec.utils import infer_compression
|
|
21
22
|
|
|
22
23
|
if TYPE_CHECKING:
|
|
@@ -25,6 +26,16 @@ if TYPE_CHECKING:
|
|
|
25
26
|
logger = logging.getLogger("fsspec.cached")
|
|
26
27
|
|
|
27
28
|
|
|
29
|
+
class WriteCachedTransaction(Transaction):
|
|
30
|
+
def complete(self, commit=True):
|
|
31
|
+
rpaths = [f.path for f in self.files]
|
|
32
|
+
lpaths = [f.fn for f in self.files]
|
|
33
|
+
if commit:
|
|
34
|
+
self.fs.put(lpaths, rpaths)
|
|
35
|
+
# else remove?
|
|
36
|
+
self.fs._intrans = False
|
|
37
|
+
|
|
38
|
+
|
|
28
39
|
class CachingFileSystem(AbstractFileSystem):
|
|
29
40
|
"""Locally caching filesystem, layer over any other FS
|
|
30
41
|
|
|
@@ -355,6 +366,9 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
355
366
|
self.save_cache()
|
|
356
367
|
return f
|
|
357
368
|
|
|
369
|
+
def _parent(self, path):
|
|
370
|
+
return self.fs._parent(path)
|
|
371
|
+
|
|
358
372
|
def hash_name(self, path: str, *args: Any) -> str:
|
|
359
373
|
# Kept for backward compatibility with downstream libraries.
|
|
360
374
|
# Ignores extra arguments, previously same_name boolean.
|
|
@@ -390,6 +404,7 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
390
404
|
"open",
|
|
391
405
|
"cat",
|
|
392
406
|
"cat_file",
|
|
407
|
+
"cat_ranges",
|
|
393
408
|
"get",
|
|
394
409
|
"read_block",
|
|
395
410
|
"tail",
|
|
@@ -411,6 +426,10 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
411
426
|
"__eq__",
|
|
412
427
|
"to_json",
|
|
413
428
|
"cache_size",
|
|
429
|
+
"pipe_file",
|
|
430
|
+
"pipe",
|
|
431
|
+
"start_transaction",
|
|
432
|
+
"end_transaction",
|
|
414
433
|
]:
|
|
415
434
|
# all the methods defined in this class. Note `open` here, since
|
|
416
435
|
# it calls `_open`, but is actually in superclass
|
|
@@ -419,7 +438,10 @@ class CachingFileSystem(AbstractFileSystem):
|
|
|
419
438
|
)
|
|
420
439
|
if item in ["__reduce_ex__"]:
|
|
421
440
|
raise AttributeError
|
|
422
|
-
if item in ["
|
|
441
|
+
if item in ["transaction"]:
|
|
442
|
+
# property
|
|
443
|
+
return type(self).transaction.__get__(self)
|
|
444
|
+
if item in ["_cache", "transaction_type"]:
|
|
423
445
|
# class attributes
|
|
424
446
|
return getattr(type(self), item)
|
|
425
447
|
if item == "__class__":
|
|
@@ -508,7 +530,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
508
530
|
self._mkcache()
|
|
509
531
|
else:
|
|
510
532
|
return [
|
|
511
|
-
LocalTempFile(
|
|
533
|
+
LocalTempFile(
|
|
534
|
+
self.fs,
|
|
535
|
+
path,
|
|
536
|
+
mode=open_files.mode,
|
|
537
|
+
fn=os.path.join(self.storage[-1], self._mapper(path)),
|
|
538
|
+
)
|
|
539
|
+
for path in paths
|
|
512
540
|
]
|
|
513
541
|
|
|
514
542
|
if self.compression:
|
|
@@ -621,7 +649,8 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
621
649
|
def _open(self, path, mode="rb", **kwargs):
|
|
622
650
|
path = self._strip_protocol(path)
|
|
623
651
|
if "r" not in mode:
|
|
624
|
-
|
|
652
|
+
fn = self._make_local_details(path)
|
|
653
|
+
return LocalTempFile(self, path, mode=mode, fn=fn)
|
|
625
654
|
detail = self._check_file(path)
|
|
626
655
|
if detail:
|
|
627
656
|
detail, fn = detail
|
|
@@ -688,6 +717,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
688
717
|
|
|
689
718
|
protocol = "simplecache"
|
|
690
719
|
local_file = True
|
|
720
|
+
transaction_type = WriteCachedTransaction
|
|
691
721
|
|
|
692
722
|
def __init__(self, **kwargs):
|
|
693
723
|
kw = kwargs.copy()
|
|
@@ -712,16 +742,46 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
712
742
|
def load_cache(self):
|
|
713
743
|
pass
|
|
714
744
|
|
|
745
|
+
def pipe_file(self, path, value=None, **kwargs):
|
|
746
|
+
if self._intrans:
|
|
747
|
+
with self.open(path, "wb") as f:
|
|
748
|
+
f.write(value)
|
|
749
|
+
else:
|
|
750
|
+
super().pipe_file(path, value)
|
|
751
|
+
|
|
752
|
+
def pipe(self, path, value=None, **kwargs):
|
|
753
|
+
if isinstance(path, str):
|
|
754
|
+
self.pipe_file(self._strip_protocol(path), value, **kwargs)
|
|
755
|
+
elif isinstance(path, dict):
|
|
756
|
+
for k, v in path.items():
|
|
757
|
+
self.pipe_file(self._strip_protocol(k), v, **kwargs)
|
|
758
|
+
else:
|
|
759
|
+
raise ValueError("path must be str or dict")
|
|
760
|
+
|
|
761
|
+
def cat_ranges(
|
|
762
|
+
self, paths, starts, ends, max_gap=None, on_error="return", **kwargs
|
|
763
|
+
):
|
|
764
|
+
lpaths = [self._check_file(p) for p in paths]
|
|
765
|
+
rpaths = [p for l, p in zip(lpaths, paths) if l is False]
|
|
766
|
+
lpaths = [l for l, p in zip(lpaths, paths) if l is False]
|
|
767
|
+
self.fs.get(rpaths, lpaths)
|
|
768
|
+
return super().cat_ranges(
|
|
769
|
+
paths, starts, ends, max_gap=max_gap, on_error=on_error, **kwargs
|
|
770
|
+
)
|
|
771
|
+
|
|
715
772
|
def _open(self, path, mode="rb", **kwargs):
|
|
716
773
|
path = self._strip_protocol(path)
|
|
774
|
+
sha = self._mapper(path)
|
|
717
775
|
|
|
718
776
|
if "r" not in mode:
|
|
719
|
-
|
|
777
|
+
fn = os.path.join(self.storage[-1], sha)
|
|
778
|
+
return LocalTempFile(
|
|
779
|
+
self, path, mode=mode, autocommit=not self._intrans, fn=fn
|
|
780
|
+
)
|
|
720
781
|
fn = self._check_file(path)
|
|
721
782
|
if fn:
|
|
722
783
|
return open(fn, mode)
|
|
723
784
|
|
|
724
|
-
sha = self._mapper(path)
|
|
725
785
|
fn = os.path.join(self.storage[-1], sha)
|
|
726
786
|
logger.debug("Copying %s to local cache", path)
|
|
727
787
|
kwargs["mode"] = mode
|
|
@@ -752,13 +812,9 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
752
812
|
class LocalTempFile:
|
|
753
813
|
"""A temporary local file, which will be uploaded on commit"""
|
|
754
814
|
|
|
755
|
-
def __init__(self, fs, path, fn
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
self.fh = open(fn, mode)
|
|
759
|
-
else:
|
|
760
|
-
fd, self.fn = tempfile.mkstemp()
|
|
761
|
-
self.fh = open(fd, mode)
|
|
815
|
+
def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
|
|
816
|
+
self.fn = fn
|
|
817
|
+
self.fh = open(fn, mode)
|
|
762
818
|
self.mode = mode
|
|
763
819
|
if seek:
|
|
764
820
|
self.fh.seek(seek)
|
|
@@ -768,10 +824,10 @@ class LocalTempFile:
|
|
|
768
824
|
self.autocommit = autocommit
|
|
769
825
|
|
|
770
826
|
def __reduce__(self):
|
|
771
|
-
# always open in
|
|
827
|
+
# always open in r+b to allow continuing writing at a location
|
|
772
828
|
return (
|
|
773
829
|
LocalTempFile,
|
|
774
|
-
(self.fs, self.path, self.fn, "
|
|
830
|
+
(self.fs, self.path, self.fn, "r+b", self.autocommit, self.tell()),
|
|
775
831
|
)
|
|
776
832
|
|
|
777
833
|
def __enter__(self):
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
from urllib.parse import unquote
|
|
4
|
+
|
|
5
|
+
from fsspec import AbstractFileSystem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DataFileSystem(AbstractFileSystem):
|
|
9
|
+
"""A handy decoder for data-URLs
|
|
10
|
+
|
|
11
|
+
Example
|
|
12
|
+
-------
|
|
13
|
+
>>> with fsspec.open("data:,Hello%2C%20World%21") as f:
|
|
14
|
+
... print(f.read())
|
|
15
|
+
b"Hello, World!"
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
protocol = "data"
|
|
20
|
+
|
|
21
|
+
def __init__(self, **kwargs):
|
|
22
|
+
"""No parameters for this filesystem"""
|
|
23
|
+
super().__init__(**kwargs)
|
|
24
|
+
|
|
25
|
+
def cat_file(self, path, start=None, end=None, **kwargs):
|
|
26
|
+
pref, data = path.split(",", 1)
|
|
27
|
+
if pref.endswith("base64"):
|
|
28
|
+
return base64.b64decode(data)[start:end]
|
|
29
|
+
return unquote(data).encode()[start:end]
|
|
30
|
+
|
|
31
|
+
def info(self, path, **kwargs):
|
|
32
|
+
pref, name = path.split(",", 1)
|
|
33
|
+
data = self.cat_file(path)
|
|
34
|
+
mime = pref.split(":", 1)[1].split(";", 1)[0]
|
|
35
|
+
return {"name": name, "size": len(data), "type": "file", "mimetype": mime}
|
|
36
|
+
|
|
37
|
+
def _open(
|
|
38
|
+
self,
|
|
39
|
+
path,
|
|
40
|
+
mode="rb",
|
|
41
|
+
block_size=None,
|
|
42
|
+
autocommit=True,
|
|
43
|
+
cache_options=None,
|
|
44
|
+
**kwargs,
|
|
45
|
+
):
|
|
46
|
+
if "r" not in mode:
|
|
47
|
+
raise ValueError("Read only filesystem")
|
|
48
|
+
return io.BytesIO(self.cat_file(path))
|
|
@@ -14,7 +14,13 @@ from fsspec.asyn import AbstractAsyncStreamedFile, AsyncFileSystem, sync, sync_w
|
|
|
14
14
|
from fsspec.callbacks import _DEFAULT_CALLBACK
|
|
15
15
|
from fsspec.exceptions import FSTimeoutError
|
|
16
16
|
from fsspec.spec import AbstractBufferedFile
|
|
17
|
-
from fsspec.utils import
|
|
17
|
+
from fsspec.utils import (
|
|
18
|
+
DEFAULT_BLOCK_SIZE,
|
|
19
|
+
glob_translate,
|
|
20
|
+
isfilelike,
|
|
21
|
+
nullcontext,
|
|
22
|
+
tokenize,
|
|
23
|
+
)
|
|
18
24
|
|
|
19
25
|
from ..caching import AllBytes
|
|
20
26
|
|
|
@@ -441,8 +447,9 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
441
447
|
raise ValueError("maxdepth must be at least 1")
|
|
442
448
|
import re
|
|
443
449
|
|
|
444
|
-
|
|
450
|
+
ends_with_slash = path.endswith("/") # _strip_protocol strips trailing slash
|
|
445
451
|
path = self._strip_protocol(path)
|
|
452
|
+
append_slash_to_dirname = ends_with_slash or path.endswith("/**")
|
|
446
453
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
447
454
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
448
455
|
|
|
@@ -451,11 +458,11 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
451
458
|
detail = kwargs.pop("detail", False)
|
|
452
459
|
|
|
453
460
|
if not has_magic(path):
|
|
454
|
-
if await self._exists(path):
|
|
461
|
+
if await self._exists(path, **kwargs):
|
|
455
462
|
if not detail:
|
|
456
463
|
return [path]
|
|
457
464
|
else:
|
|
458
|
-
return {path: await self._info(path)}
|
|
465
|
+
return {path: await self._info(path, **kwargs)}
|
|
459
466
|
else:
|
|
460
467
|
if not detail:
|
|
461
468
|
return [] # glob of non-existent returns empty
|
|
@@ -480,45 +487,22 @@ class HTTPFileSystem(AsyncFileSystem):
|
|
|
480
487
|
allpaths = await self._find(
|
|
481
488
|
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
|
482
489
|
)
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
486
|
-
# for shell globbing details.
|
|
487
|
-
pattern = (
|
|
488
|
-
"^"
|
|
489
|
-
+ (
|
|
490
|
-
path.replace("\\", r"\\")
|
|
491
|
-
.replace(".", r"\.")
|
|
492
|
-
.replace("+", r"\+")
|
|
493
|
-
.replace("//", "/")
|
|
494
|
-
.replace("(", r"\(")
|
|
495
|
-
.replace(")", r"\)")
|
|
496
|
-
.replace("|", r"\|")
|
|
497
|
-
.replace("^", r"\^")
|
|
498
|
-
.replace("$", r"\$")
|
|
499
|
-
.replace("{", r"\{")
|
|
500
|
-
.replace("}", r"\}")
|
|
501
|
-
.rstrip("/")
|
|
502
|
-
)
|
|
503
|
-
+ "$"
|
|
504
|
-
)
|
|
505
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
506
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
507
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
508
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
509
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
490
|
+
|
|
491
|
+
pattern = glob_translate(path + ("/" if ends_with_slash else ""))
|
|
510
492
|
pattern = re.compile(pattern)
|
|
493
|
+
|
|
511
494
|
out = {
|
|
512
|
-
p:
|
|
513
|
-
for p in sorted(allpaths)
|
|
514
|
-
if pattern.match(
|
|
495
|
+
p: info
|
|
496
|
+
for p, info in sorted(allpaths.items())
|
|
497
|
+
if pattern.match(
|
|
498
|
+
(
|
|
499
|
+
p + "/"
|
|
500
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
501
|
+
else p
|
|
502
|
+
)
|
|
503
|
+
)
|
|
515
504
|
}
|
|
516
505
|
|
|
517
|
-
# Return directories only when the glob end by a slash
|
|
518
|
-
# This is needed for posix glob compliance
|
|
519
|
-
if ends:
|
|
520
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
521
|
-
|
|
522
506
|
if detail:
|
|
523
507
|
return out
|
|
524
508
|
else:
|
|
@@ -818,7 +802,7 @@ async def get_range(session, url, start, end, file=None, **kwargs):
|
|
|
818
802
|
async with r:
|
|
819
803
|
out = await r.read()
|
|
820
804
|
if file:
|
|
821
|
-
with open(file, "
|
|
805
|
+
with open(file, "r+b") as f:
|
|
822
806
|
f.seek(start)
|
|
823
807
|
f.write(out)
|
|
824
808
|
else:
|
|
@@ -855,7 +839,10 @@ async def _file_info(url, session, size_policy="head", **kwargs):
|
|
|
855
839
|
if "Content-Length" in r.headers:
|
|
856
840
|
# Some servers may choose to ignore Accept-Encoding and return
|
|
857
841
|
# compressed content, in which case the returned size is unreliable.
|
|
858
|
-
if r.headers.
|
|
842
|
+
if "Content-Encoding" not in r.headers or r.headers["Content-Encoding"] in [
|
|
843
|
+
"identity",
|
|
844
|
+
"",
|
|
845
|
+
]:
|
|
859
846
|
info["size"] = int(r.headers["Content-Length"])
|
|
860
847
|
elif "Content-Range" in r.headers:
|
|
861
848
|
info["size"] = int(r.headers["Content-Range"].split("/")[1])
|
|
@@ -175,7 +175,7 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
175
175
|
parent = self._parent(parent)
|
|
176
176
|
if self.isfile(parent):
|
|
177
177
|
raise FileExistsError(parent)
|
|
178
|
-
if mode in ["rb", "ab", "
|
|
178
|
+
if mode in ["rb", "ab", "r+b"]:
|
|
179
179
|
if path in self.store:
|
|
180
180
|
f = self.store[path]
|
|
181
181
|
if mode == "ab":
|