fsspec 2023.10.0__py3-none-any.whl → 2024.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +3 -3
- fsspec/archive.py +4 -4
- fsspec/asyn.py +43 -53
- fsspec/caching.py +1 -1
- fsspec/callbacks.py +98 -12
- fsspec/compression.py +3 -3
- fsspec/core.py +16 -3
- fsspec/exceptions.py +0 -4
- fsspec/generic.py +11 -4
- fsspec/gui.py +4 -3
- fsspec/implementations/arrow.py +9 -0
- fsspec/implementations/cache_mapper.py +2 -6
- fsspec/implementations/cached.py +92 -18
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/dbfs.py +14 -4
- fsspec/implementations/dirfs.py +6 -0
- fsspec/implementations/ftp.py +18 -13
- fsspec/implementations/github.py +17 -5
- fsspec/implementations/http.py +42 -51
- fsspec/implementations/libarchive.py +2 -3
- fsspec/implementations/local.py +11 -4
- fsspec/implementations/memory.py +2 -2
- fsspec/implementations/reference.py +127 -56
- fsspec/implementations/sftp.py +6 -5
- fsspec/implementations/smb.py +0 -1
- fsspec/implementations/tar.py +2 -1
- fsspec/implementations/webhdfs.py +46 -5
- fsspec/implementations/zip.py +11 -3
- fsspec/parquet.py +3 -5
- fsspec/registry.py +2 -1
- fsspec/spec.py +51 -61
- fsspec/tests/abstract/common.py +5 -5
- fsspec/tests/abstract/copy.py +21 -7
- fsspec/tests/abstract/put.py +21 -7
- fsspec/transaction.py +8 -4
- fsspec/utils.py +114 -1
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/METADATA +1 -2
- fsspec-2024.2.0.dist-info/RECORD +54 -0
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/WHEEL +1 -1
- fsspec-2023.10.0.dist-info/RECORD +0 -53
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.10.0.dist-info → fsspec-2024.2.0.dist-info}/top_level.txt +0 -0
fsspec/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "
|
|
11
|
+
"date": "2024-02-04T20:21:42-0500",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "
|
|
14
|
+
"full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd",
|
|
15
|
+
"version": "2024.2.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
fsspec/archive.py
CHANGED
|
@@ -38,7 +38,7 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
|
|
|
38
38
|
self._get_dirs()
|
|
39
39
|
path = self._strip_protocol(path)
|
|
40
40
|
if path in {"", "/"} and self.dir_cache:
|
|
41
|
-
return {"name": "
|
|
41
|
+
return {"name": "", "type": "directory", "size": 0}
|
|
42
42
|
if path in self.dir_cache:
|
|
43
43
|
return self.dir_cache[path]
|
|
44
44
|
elif path + "/" in self.dir_cache:
|
|
@@ -64,10 +64,10 @@ class AbstractArchiveFileSystem(AbstractFileSystem):
|
|
|
64
64
|
# root directory entry
|
|
65
65
|
ppath = p.rstrip("/").split("/", 1)[0]
|
|
66
66
|
if ppath not in paths:
|
|
67
|
-
out = {"name": ppath
|
|
67
|
+
out = {"name": ppath, "size": 0, "type": "directory"}
|
|
68
68
|
paths[ppath] = out
|
|
69
|
-
out = sorted(paths.values(), key=lambda _: _["name"])
|
|
70
69
|
if detail:
|
|
70
|
+
out = sorted(paths.values(), key=lambda _: _["name"])
|
|
71
71
|
return out
|
|
72
72
|
else:
|
|
73
|
-
return
|
|
73
|
+
return sorted(paths)
|
fsspec/asyn.py
CHANGED
|
@@ -11,11 +11,11 @@ from contextlib import contextmanager
|
|
|
11
11
|
from glob import has_magic
|
|
12
12
|
from typing import TYPE_CHECKING, Iterable
|
|
13
13
|
|
|
14
|
-
from .callbacks import
|
|
14
|
+
from .callbacks import DEFAULT_CALLBACK
|
|
15
15
|
from .exceptions import FSTimeoutError
|
|
16
16
|
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
|
17
17
|
from .spec import AbstractBufferedFile, AbstractFileSystem
|
|
18
|
-
from .utils import is_exception, other_paths
|
|
18
|
+
from .utils import glob_translate, is_exception, other_paths
|
|
19
19
|
|
|
20
20
|
private = re.compile("_[^_]")
|
|
21
21
|
iothread = [None] # dedicated fsspec IO thread
|
|
@@ -106,7 +106,7 @@ def sync(loop, func, *args, timeout=None, **kwargs):
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def sync_wrapper(func, obj=None):
|
|
109
|
-
"""Given a function, make so can be called in
|
|
109
|
+
"""Given a function, make so can be called in blocking contexts
|
|
110
110
|
|
|
111
111
|
Leave obj=None if defining within a class. Pass the instance if attaching
|
|
112
112
|
as an attribute of the instance.
|
|
@@ -205,7 +205,7 @@ def running_async() -> bool:
|
|
|
205
205
|
async def _run_coros_in_chunks(
|
|
206
206
|
coros,
|
|
207
207
|
batch_size=None,
|
|
208
|
-
callback=
|
|
208
|
+
callback=DEFAULT_CALLBACK,
|
|
209
209
|
timeout=None,
|
|
210
210
|
return_exceptions=False,
|
|
211
211
|
nofiles=False,
|
|
@@ -245,7 +245,7 @@ async def _run_coros_in_chunks(
|
|
|
245
245
|
asyncio.Task(asyncio.wait_for(c, timeout=timeout))
|
|
246
246
|
for c in coros[start : start + batch_size]
|
|
247
247
|
]
|
|
248
|
-
if callback is not
|
|
248
|
+
if callback is not DEFAULT_CALLBACK:
|
|
249
249
|
[
|
|
250
250
|
t.add_done_callback(lambda *_, **__: callback.relative_update(1))
|
|
251
251
|
for t in chunk
|
|
@@ -467,6 +467,16 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
467
467
|
on_error="return",
|
|
468
468
|
**kwargs,
|
|
469
469
|
):
|
|
470
|
+
"""Get the contents of byte ranges from one or more files
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
paths: list
|
|
475
|
+
A list of of filepaths on this filesystems
|
|
476
|
+
starts, ends: int or list
|
|
477
|
+
Bytes limits of the read. If using a single int, the same value will be
|
|
478
|
+
used to read all the specified files.
|
|
479
|
+
"""
|
|
470
480
|
# TODO: on_error
|
|
471
481
|
if max_gap is not None:
|
|
472
482
|
# use utils.merge_offset_ranges
|
|
@@ -476,7 +486,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
476
486
|
if not isinstance(starts, Iterable):
|
|
477
487
|
starts = [starts] * len(paths)
|
|
478
488
|
if not isinstance(ends, Iterable):
|
|
479
|
-
ends = [
|
|
489
|
+
ends = [ends] * len(paths)
|
|
480
490
|
if len(starts) != len(paths) or len(ends) != len(paths):
|
|
481
491
|
raise ValueError
|
|
482
492
|
coros = [
|
|
@@ -496,7 +506,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
496
506
|
lpath,
|
|
497
507
|
rpath,
|
|
498
508
|
recursive=False,
|
|
499
|
-
callback=
|
|
509
|
+
callback=DEFAULT_CALLBACK,
|
|
500
510
|
batch_size=None,
|
|
501
511
|
maxdepth=None,
|
|
502
512
|
**kwargs,
|
|
@@ -558,8 +568,8 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
558
568
|
coros = []
|
|
559
569
|
callback.set_size(len(file_pairs))
|
|
560
570
|
for lfile, rfile in file_pairs:
|
|
561
|
-
callback.
|
|
562
|
-
coros.append(
|
|
571
|
+
put_file = callback.branch_coro(self._put_file)
|
|
572
|
+
coros.append(put_file(lfile, rfile, **kwargs))
|
|
563
573
|
|
|
564
574
|
return await _run_coros_in_chunks(
|
|
565
575
|
coros, batch_size=batch_size, callback=callback
|
|
@@ -573,7 +583,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
573
583
|
rpath,
|
|
574
584
|
lpath,
|
|
575
585
|
recursive=False,
|
|
576
|
-
callback=
|
|
586
|
+
callback=DEFAULT_CALLBACK,
|
|
577
587
|
maxdepth=None,
|
|
578
588
|
**kwargs,
|
|
579
589
|
):
|
|
@@ -635,8 +645,8 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
635
645
|
coros = []
|
|
636
646
|
callback.set_size(len(lpaths))
|
|
637
647
|
for lpath, rpath in zip(lpaths, rpaths):
|
|
638
|
-
callback.
|
|
639
|
-
coros.append(
|
|
648
|
+
get_file = callback.branch_coro(self._get_file)
|
|
649
|
+
coros.append(get_file(rpath, lpath, **kwargs))
|
|
640
650
|
return await _run_coros_in_chunks(
|
|
641
651
|
coros, batch_size=batch_size, callback=callback
|
|
642
652
|
)
|
|
@@ -662,9 +672,9 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
662
672
|
[self._size(p) for p in paths], batch_size=batch_size
|
|
663
673
|
)
|
|
664
674
|
|
|
665
|
-
async def _exists(self, path):
|
|
675
|
+
async def _exists(self, path, **kwargs):
|
|
666
676
|
try:
|
|
667
|
-
await self._info(path)
|
|
677
|
+
await self._info(path, **kwargs)
|
|
668
678
|
return True
|
|
669
679
|
except FileNotFoundError:
|
|
670
680
|
return False
|
|
@@ -735,8 +745,12 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
735
745
|
|
|
736
746
|
import re
|
|
737
747
|
|
|
738
|
-
|
|
748
|
+
seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
|
|
749
|
+
ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
|
|
739
750
|
path = self._strip_protocol(path)
|
|
751
|
+
append_slash_to_dirname = ends_with_sep or path.endswith(
|
|
752
|
+
tuple(sep + "**" for sep in seps)
|
|
753
|
+
)
|
|
740
754
|
idx_star = path.find("*") if path.find("*") >= 0 else len(path)
|
|
741
755
|
idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
|
|
742
756
|
idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
|
|
@@ -746,11 +760,11 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
746
760
|
detail = kwargs.pop("detail", False)
|
|
747
761
|
|
|
748
762
|
if not has_magic(path):
|
|
749
|
-
if await self._exists(path):
|
|
763
|
+
if await self._exists(path, **kwargs):
|
|
750
764
|
if not detail:
|
|
751
765
|
return [path]
|
|
752
766
|
else:
|
|
753
|
-
return {path: await self._info(path)}
|
|
767
|
+
return {path: await self._info(path, **kwargs)}
|
|
754
768
|
else:
|
|
755
769
|
if not detail:
|
|
756
770
|
return [] # glob of non-existent returns empty
|
|
@@ -775,46 +789,22 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
775
789
|
allpaths = await self._find(
|
|
776
790
|
root, maxdepth=depth, withdirs=True, detail=True, **kwargs
|
|
777
791
|
)
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
# See https://www.gnu.org/software/bash/manual/html_node/Pattern-Matching.html
|
|
781
|
-
# for shell globbing details.
|
|
782
|
-
pattern = (
|
|
783
|
-
"^"
|
|
784
|
-
+ (
|
|
785
|
-
path.replace("\\", r"\\")
|
|
786
|
-
.replace(".", r"\.")
|
|
787
|
-
.replace("+", r"\+")
|
|
788
|
-
.replace("//", "/")
|
|
789
|
-
.replace("(", r"\(")
|
|
790
|
-
.replace(")", r"\)")
|
|
791
|
-
.replace("|", r"\|")
|
|
792
|
-
.replace("^", r"\^")
|
|
793
|
-
.replace("$", r"\$")
|
|
794
|
-
.replace("{", r"\{")
|
|
795
|
-
.replace("}", r"\}")
|
|
796
|
-
.rstrip("/")
|
|
797
|
-
.replace("?", ".")
|
|
798
|
-
)
|
|
799
|
-
+ "$"
|
|
800
|
-
)
|
|
801
|
-
pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern)
|
|
802
|
-
pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern)
|
|
803
|
-
pattern = re.sub("[*]", "[^/]*", pattern)
|
|
804
|
-
pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern)
|
|
805
|
-
pattern = re.sub("=DOUBLE_STARS=", ".*", pattern)
|
|
792
|
+
|
|
793
|
+
pattern = glob_translate(path + ("/" if ends_with_sep else ""))
|
|
806
794
|
pattern = re.compile(pattern)
|
|
795
|
+
|
|
807
796
|
out = {
|
|
808
|
-
p:
|
|
809
|
-
for p in sorted(allpaths)
|
|
810
|
-
if pattern.match(
|
|
797
|
+
p: info
|
|
798
|
+
for p, info in sorted(allpaths.items())
|
|
799
|
+
if pattern.match(
|
|
800
|
+
(
|
|
801
|
+
p + "/"
|
|
802
|
+
if append_slash_to_dirname and info["type"] == "directory"
|
|
803
|
+
else p
|
|
804
|
+
)
|
|
805
|
+
)
|
|
811
806
|
}
|
|
812
807
|
|
|
813
|
-
# Return directories only when the glob end by a slash
|
|
814
|
-
# This is needed for posix glob compliance
|
|
815
|
-
if ends:
|
|
816
|
-
out = {k: v for k, v in out.items() if v["type"] == "directory"}
|
|
817
|
-
|
|
818
808
|
if detail:
|
|
819
809
|
return out
|
|
820
810
|
else:
|
fsspec/caching.py
CHANGED
fsspec/callbacks.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class Callback:
|
|
2
5
|
"""
|
|
3
6
|
Base class and interface for callback mechanism
|
|
@@ -25,6 +28,60 @@ class Callback:
|
|
|
25
28
|
self.hooks = hooks or {}
|
|
26
29
|
self.kw = kwargs
|
|
27
30
|
|
|
31
|
+
def __enter__(self):
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def __exit__(self, *exc_args):
|
|
35
|
+
self.close()
|
|
36
|
+
|
|
37
|
+
def close(self):
|
|
38
|
+
"""Close callback."""
|
|
39
|
+
|
|
40
|
+
def branched(self, path_1, path_2, **kwargs):
|
|
41
|
+
"""
|
|
42
|
+
Return callback for child transfers
|
|
43
|
+
|
|
44
|
+
If this callback is operating at a higher level, e.g., put, which may
|
|
45
|
+
trigger transfers that can also be monitored. The function returns a callback
|
|
46
|
+
that has to be passed to the child method, e.g., put_file,
|
|
47
|
+
as `callback=` argument.
|
|
48
|
+
|
|
49
|
+
The implementation uses `callback.branch` for compatibility.
|
|
50
|
+
When implementing callbacks, it is recommended to override this function instead
|
|
51
|
+
of `branch` and avoid calling `super().branched(...)`.
|
|
52
|
+
|
|
53
|
+
Prefer using this function over `branch`.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
path_1: str
|
|
58
|
+
Child's source path
|
|
59
|
+
path_2: str
|
|
60
|
+
Child's destination path
|
|
61
|
+
**kwargs:
|
|
62
|
+
Arbitrary keyword arguments
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
callback: Callback
|
|
67
|
+
A callback instance to be passed to the child method
|
|
68
|
+
"""
|
|
69
|
+
self.branch(path_1, path_2, kwargs)
|
|
70
|
+
# mutate kwargs so that we can force the caller to pass "callback=" explicitly
|
|
71
|
+
return kwargs.pop("callback", DEFAULT_CALLBACK)
|
|
72
|
+
|
|
73
|
+
def branch_coro(self, fn):
|
|
74
|
+
"""
|
|
75
|
+
Wraps a coroutine, and pass a new child callback to it.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
@wraps(fn)
|
|
79
|
+
async def func(path1, path2: str, **kwargs):
|
|
80
|
+
with self.branched(path1, path2, **kwargs) as child:
|
|
81
|
+
return await fn(path1, path2, callback=child, **kwargs)
|
|
82
|
+
|
|
83
|
+
return func
|
|
84
|
+
|
|
28
85
|
def set_size(self, size):
|
|
29
86
|
"""
|
|
30
87
|
Set the internal maximum size attribute
|
|
@@ -140,10 +197,10 @@ class Callback:
|
|
|
140
197
|
|
|
141
198
|
For the special value of ``None``, return the global instance of
|
|
142
199
|
``NoOpCallback``. This is an alternative to including
|
|
143
|
-
``callback=
|
|
200
|
+
``callback=DEFAULT_CALLBACK`` directly in a method signature.
|
|
144
201
|
"""
|
|
145
202
|
if maybe_callback is None:
|
|
146
|
-
return
|
|
203
|
+
return DEFAULT_CALLBACK
|
|
147
204
|
return maybe_callback
|
|
148
205
|
|
|
149
206
|
|
|
@@ -186,7 +243,9 @@ class TqdmCallback(Callback):
|
|
|
186
243
|
tqdm_kwargs : dict, (optional)
|
|
187
244
|
Any argument accepted by the tqdm constructor.
|
|
188
245
|
See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
|
|
189
|
-
Will be forwarded to
|
|
246
|
+
Will be forwarded to `tqdm_cls`.
|
|
247
|
+
tqdm_cls: (optional)
|
|
248
|
+
subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
|
|
190
249
|
|
|
191
250
|
Examples
|
|
192
251
|
--------
|
|
@@ -209,30 +268,57 @@ class TqdmCallback(Callback):
|
|
|
209
268
|
recursive=True,
|
|
210
269
|
callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
|
|
211
270
|
)
|
|
271
|
+
|
|
272
|
+
You can also customize the progress bar by passing a subclass of `tqdm`.
|
|
273
|
+
|
|
274
|
+
.. code-block:: python
|
|
275
|
+
|
|
276
|
+
class TqdmFormat(tqdm):
|
|
277
|
+
'''Provides a `total_time` format parameter'''
|
|
278
|
+
@property
|
|
279
|
+
def format_dict(self):
|
|
280
|
+
d = super().format_dict
|
|
281
|
+
total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
|
|
282
|
+
d.update(total_time=self.format_interval(total_time) + " in total")
|
|
283
|
+
return d
|
|
284
|
+
|
|
285
|
+
>>> with TqdmCallback(
|
|
286
|
+
tqdm_kwargs={
|
|
287
|
+
"desc": "desc",
|
|
288
|
+
"bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
|
|
289
|
+
},
|
|
290
|
+
tqdm_cls=TqdmFormat,
|
|
291
|
+
) as callback:
|
|
292
|
+
fs.upload(".", path2distant_data, recursive=True, callback=callback)
|
|
212
293
|
"""
|
|
213
294
|
|
|
214
295
|
def __init__(self, tqdm_kwargs=None, *args, **kwargs):
|
|
215
296
|
try:
|
|
216
|
-
import tqdm
|
|
297
|
+
from tqdm import tqdm
|
|
217
298
|
|
|
218
|
-
self._tqdm = tqdm
|
|
219
299
|
except ImportError as exce:
|
|
220
300
|
raise ImportError(
|
|
221
301
|
"Using TqdmCallback requires tqdm to be installed"
|
|
222
302
|
) from exce
|
|
223
303
|
|
|
304
|
+
self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
|
|
224
305
|
self._tqdm_kwargs = tqdm_kwargs or {}
|
|
306
|
+
self.tqdm = None
|
|
225
307
|
super().__init__(*args, **kwargs)
|
|
226
308
|
|
|
227
|
-
def
|
|
228
|
-
self.tqdm
|
|
309
|
+
def call(self, *args, **kwargs):
|
|
310
|
+
if self.tqdm is None:
|
|
311
|
+
self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
|
|
312
|
+
self.tqdm.total = self.size
|
|
313
|
+
self.tqdm.update(self.value - self.tqdm.n)
|
|
229
314
|
|
|
230
|
-
def
|
|
231
|
-
self.tqdm
|
|
315
|
+
def close(self):
|
|
316
|
+
if self.tqdm is not None:
|
|
317
|
+
self.tqdm.close()
|
|
318
|
+
self.tqdm = None
|
|
232
319
|
|
|
233
320
|
def __del__(self):
|
|
234
|
-
self.
|
|
235
|
-
self.tqdm = None
|
|
321
|
+
return self.close()
|
|
236
322
|
|
|
237
323
|
|
|
238
|
-
_DEFAULT_CALLBACK = NoOpCallback()
|
|
324
|
+
DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
|
fsspec/compression.py
CHANGED
|
@@ -90,15 +90,15 @@ except ImportError:
|
|
|
90
90
|
try:
|
|
91
91
|
from lzma import LZMAFile
|
|
92
92
|
|
|
93
|
-
register_compression("lzma", LZMAFile, "
|
|
94
|
-
register_compression("xz", LZMAFile, "xz"
|
|
93
|
+
register_compression("lzma", LZMAFile, "lzma")
|
|
94
|
+
register_compression("xz", LZMAFile, "xz")
|
|
95
95
|
except ImportError:
|
|
96
96
|
pass
|
|
97
97
|
|
|
98
98
|
try:
|
|
99
99
|
import lzmaffi
|
|
100
100
|
|
|
101
|
-
register_compression("lzma", lzmaffi.LZMAFile, "
|
|
101
|
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
|
102
102
|
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
|
103
103
|
except ImportError:
|
|
104
104
|
pass
|
fsspec/core.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import io
|
|
2
4
|
import logging
|
|
3
5
|
import os
|
|
4
6
|
import re
|
|
5
7
|
from glob import has_magic
|
|
8
|
+
from pathlib import Path
|
|
6
9
|
|
|
7
10
|
# for backwards compat, we export cache things from here too
|
|
8
11
|
from .caching import ( # noqa: F401
|
|
@@ -290,7 +293,11 @@ def open_files(
|
|
|
290
293
|
fs.auto_mkdir = auto_mkdir
|
|
291
294
|
elif "r" not in mode and auto_mkdir:
|
|
292
295
|
parents = {fs._parent(path) for path in paths}
|
|
293
|
-
|
|
296
|
+
for parent in parents:
|
|
297
|
+
try:
|
|
298
|
+
fs.makedirs(parent, exist_ok=True)
|
|
299
|
+
except PermissionError:
|
|
300
|
+
pass
|
|
294
301
|
return OpenFiles(
|
|
295
302
|
[
|
|
296
303
|
OpenFile(
|
|
@@ -465,7 +472,11 @@ def open(
|
|
|
465
472
|
return out[0]
|
|
466
473
|
|
|
467
474
|
|
|
468
|
-
def open_local(
|
|
475
|
+
def open_local(
|
|
476
|
+
url: str | list[str] | Path | list[Path],
|
|
477
|
+
mode: str = "rb",
|
|
478
|
+
**storage_options: dict,
|
|
479
|
+
) -> str | list[str]:
|
|
469
480
|
"""Open file(s) which can be resolved to local
|
|
470
481
|
|
|
471
482
|
For files which either are local, or get downloaded upon open
|
|
@@ -489,7 +500,7 @@ def open_local(url, mode="rb", **storage_options):
|
|
|
489
500
|
)
|
|
490
501
|
with of as files:
|
|
491
502
|
paths = [f.name for f in files]
|
|
492
|
-
if isinstance(url, str) and not has_magic(url):
|
|
503
|
+
if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
|
|
493
504
|
return paths[0]
|
|
494
505
|
return paths
|
|
495
506
|
|
|
@@ -510,6 +521,8 @@ def split_protocol(urlpath):
|
|
|
510
521
|
if len(protocol) > 1:
|
|
511
522
|
# excludes Windows paths
|
|
512
523
|
return protocol, path
|
|
524
|
+
if urlpath.startswith("data:"):
|
|
525
|
+
return urlpath.split(":", 1)
|
|
513
526
|
return None, urlpath
|
|
514
527
|
|
|
515
528
|
|
fsspec/exceptions.py
CHANGED
fsspec/generic.py
CHANGED
|
@@ -8,7 +8,7 @@ import uuid
|
|
|
8
8
|
from typing import Optional
|
|
9
9
|
|
|
10
10
|
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
|
11
|
-
from .callbacks import
|
|
11
|
+
from .callbacks import DEFAULT_CALLBACK
|
|
12
12
|
from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
|
|
13
13
|
|
|
14
14
|
_generic_fs = {}
|
|
@@ -171,6 +171,10 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
171
171
|
self.method = default_method
|
|
172
172
|
super().__init__(**kwargs)
|
|
173
173
|
|
|
174
|
+
def _parent(self, path):
|
|
175
|
+
fs = _resolve_fs(path, self.method)
|
|
176
|
+
return fs.unstrip_protocol(fs._parent(path))
|
|
177
|
+
|
|
174
178
|
def _strip_protocol(self, path):
|
|
175
179
|
# normalization only
|
|
176
180
|
fs = _resolve_fs(path, self.method)
|
|
@@ -246,9 +250,12 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
246
250
|
return fs.pipe_file(path, value, **kwargs)
|
|
247
251
|
|
|
248
252
|
async def _rm(self, url, **kwargs):
|
|
249
|
-
|
|
253
|
+
urls = url
|
|
254
|
+
if isinstance(urls, str):
|
|
255
|
+
urls = [urls]
|
|
256
|
+
fs = _resolve_fs(urls[0], self.method)
|
|
250
257
|
if fs.async_impl:
|
|
251
|
-
await fs._rm(
|
|
258
|
+
await fs._rm(urls, **kwargs)
|
|
252
259
|
else:
|
|
253
260
|
fs.rm(url, **kwargs)
|
|
254
261
|
|
|
@@ -272,7 +279,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
272
279
|
url,
|
|
273
280
|
url2,
|
|
274
281
|
blocksize=2**20,
|
|
275
|
-
callback=
|
|
282
|
+
callback=DEFAULT_CALLBACK,
|
|
276
283
|
**kwargs,
|
|
277
284
|
):
|
|
278
285
|
fs = _resolve_fs(url, self.method)
|
fsspec/gui.py
CHANGED
|
@@ -153,8 +153,9 @@ class SigSlot:
|
|
|
153
153
|
break
|
|
154
154
|
except Exception as e:
|
|
155
155
|
logger.exception(
|
|
156
|
-
"Exception (%s) while executing callback for signal: %s"
|
|
157
|
-
|
|
156
|
+
"Exception (%s) while executing callback for signal: %s",
|
|
157
|
+
e,
|
|
158
|
+
sig,
|
|
158
159
|
)
|
|
159
160
|
|
|
160
161
|
def show(self, threads=False):
|
|
@@ -242,7 +243,7 @@ class FileSelector(SigSlot):
|
|
|
242
243
|
else:
|
|
243
244
|
self.init_protocol, url = "file", os.getcwd()
|
|
244
245
|
self.init_url = url
|
|
245
|
-
self.init_kwargs = kwargs or "{}"
|
|
246
|
+
self.init_kwargs = (kwargs if isinstance(kwargs, str) else str(kwargs)) or "{}"
|
|
246
247
|
self.filters = filters
|
|
247
248
|
self.ignore = [re.compile(i) for i in ignore or []]
|
|
248
249
|
self._fs = None
|
fsspec/implementations/arrow.py
CHANGED
|
@@ -5,6 +5,7 @@ import secrets
|
|
|
5
5
|
import shutil
|
|
6
6
|
from contextlib import suppress
|
|
7
7
|
from functools import cached_property, wraps
|
|
8
|
+
from urllib.parse import parse_qs
|
|
8
9
|
|
|
9
10
|
from fsspec.spec import AbstractFileSystem
|
|
10
11
|
from fsspec.utils import (
|
|
@@ -255,6 +256,7 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
255
256
|
port=0,
|
|
256
257
|
user=None,
|
|
257
258
|
kerb_ticket=None,
|
|
259
|
+
replication=3,
|
|
258
260
|
extra_conf=None,
|
|
259
261
|
**kwargs,
|
|
260
262
|
):
|
|
@@ -270,6 +272,8 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
270
272
|
If given, connect as this username
|
|
271
273
|
kerb_ticket: str or None
|
|
272
274
|
If given, use this ticket for authentication
|
|
275
|
+
replication: int
|
|
276
|
+
set replication factor of file for write operations. default value is 3.
|
|
273
277
|
extra_conf: None or dict
|
|
274
278
|
Passed on to HadoopFileSystem
|
|
275
279
|
"""
|
|
@@ -280,6 +284,7 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
280
284
|
port=port,
|
|
281
285
|
user=user,
|
|
282
286
|
kerb_ticket=kerb_ticket,
|
|
287
|
+
replication=replication,
|
|
283
288
|
extra_conf=extra_conf,
|
|
284
289
|
)
|
|
285
290
|
super().__init__(fs=fs, **kwargs)
|
|
@@ -294,4 +299,8 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
294
299
|
out["user"] = ops["username"]
|
|
295
300
|
if ops.get("port", None):
|
|
296
301
|
out["port"] = ops["port"]
|
|
302
|
+
if ops.get("url_query", None):
|
|
303
|
+
queries = parse_qs(ops["url_query"])
|
|
304
|
+
if queries.get("replication", None):
|
|
305
|
+
out["replication"] = int(queries["replication"][0])
|
|
297
306
|
return out
|
|
@@ -2,13 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import hashlib
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
5
|
|
|
7
6
|
from fsspec.implementations.local import make_path_posix
|
|
8
7
|
|
|
9
|
-
if TYPE_CHECKING:
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
8
|
|
|
13
9
|
class AbstractCacheMapper(abc.ABC):
|
|
14
10
|
"""Abstract super-class for mappers from remote URLs to local cached
|
|
@@ -19,7 +15,7 @@ class AbstractCacheMapper(abc.ABC):
|
|
|
19
15
|
def __call__(self, path: str) -> str:
|
|
20
16
|
...
|
|
21
17
|
|
|
22
|
-
def __eq__(self, other:
|
|
18
|
+
def __eq__(self, other: object) -> bool:
|
|
23
19
|
# Identity only depends on class. When derived classes have attributes
|
|
24
20
|
# they will need to be included.
|
|
25
21
|
return isinstance(other, type(self))
|
|
@@ -56,7 +52,7 @@ class BasenameCacheMapper(AbstractCacheMapper):
|
|
|
56
52
|
else:
|
|
57
53
|
return prefix # No separator found, simple filename
|
|
58
54
|
|
|
59
|
-
def __eq__(self, other:
|
|
55
|
+
def __eq__(self, other: object) -> bool:
|
|
60
56
|
return super().__eq__(other) and self.directory_levels == other.directory_levels
|
|
61
57
|
|
|
62
58
|
def __hash__(self) -> int:
|