fsspec 2023.12.2__py3-none-any.whl → 2024.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +3 -3
- fsspec/asyn.py +9 -9
- fsspec/callbacks.py +98 -12
- fsspec/compression.py +3 -3
- fsspec/exceptions.py +0 -4
- fsspec/generic.py +2 -2
- fsspec/gui.py +3 -2
- fsspec/implementations/arrow.py +9 -0
- fsspec/implementations/cache_mapper.py +2 -6
- fsspec/implementations/cached.py +25 -7
- fsspec/implementations/dbfs.py +14 -4
- fsspec/implementations/dirfs.py +6 -0
- fsspec/implementations/ftp.py +18 -13
- fsspec/implementations/github.py +17 -5
- fsspec/implementations/http.py +14 -10
- fsspec/implementations/local.py +8 -4
- fsspec/implementations/memory.py +1 -1
- fsspec/implementations/reference.py +78 -40
- fsspec/implementations/sftp.py +1 -1
- fsspec/implementations/webhdfs.py +20 -1
- fsspec/parquet.py +3 -5
- fsspec/spec.py +15 -13
- fsspec/tests/abstract/copy.py +21 -7
- fsspec/tests/abstract/put.py +21 -7
- {fsspec-2023.12.2.dist-info → fsspec-2024.2.0.dist-info}/METADATA +1 -2
- fsspec-2024.2.0.dist-info/RECORD +54 -0
- fsspec-2023.12.2.dist-info/RECORD +0 -54
- {fsspec-2023.12.2.dist-info → fsspec-2024.2.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.12.2.dist-info → fsspec-2024.2.0.dist-info}/WHEEL +0 -0
- {fsspec-2023.12.2.dist-info → fsspec-2024.2.0.dist-info}/top_level.txt +0 -0
fsspec/_version.py
CHANGED
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "
|
|
11
|
+
"date": "2024-02-04T20:21:42-0500",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "
|
|
14
|
+
"full-revisionid": "5dc364e13b63609717d77b7361e80cfa64e3b8fd",
|
|
15
|
+
"version": "2024.2.0"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
fsspec/asyn.py
CHANGED
|
@@ -11,7 +11,7 @@ from contextlib import contextmanager
|
|
|
11
11
|
from glob import has_magic
|
|
12
12
|
from typing import TYPE_CHECKING, Iterable
|
|
13
13
|
|
|
14
|
-
from .callbacks import
|
|
14
|
+
from .callbacks import DEFAULT_CALLBACK
|
|
15
15
|
from .exceptions import FSTimeoutError
|
|
16
16
|
from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
|
|
17
17
|
from .spec import AbstractBufferedFile, AbstractFileSystem
|
|
@@ -205,7 +205,7 @@ def running_async() -> bool:
|
|
|
205
205
|
async def _run_coros_in_chunks(
|
|
206
206
|
coros,
|
|
207
207
|
batch_size=None,
|
|
208
|
-
callback=
|
|
208
|
+
callback=DEFAULT_CALLBACK,
|
|
209
209
|
timeout=None,
|
|
210
210
|
return_exceptions=False,
|
|
211
211
|
nofiles=False,
|
|
@@ -245,7 +245,7 @@ async def _run_coros_in_chunks(
|
|
|
245
245
|
asyncio.Task(asyncio.wait_for(c, timeout=timeout))
|
|
246
246
|
for c in coros[start : start + batch_size]
|
|
247
247
|
]
|
|
248
|
-
if callback is not
|
|
248
|
+
if callback is not DEFAULT_CALLBACK:
|
|
249
249
|
[
|
|
250
250
|
t.add_done_callback(lambda *_, **__: callback.relative_update(1))
|
|
251
251
|
for t in chunk
|
|
@@ -506,7 +506,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
506
506
|
lpath,
|
|
507
507
|
rpath,
|
|
508
508
|
recursive=False,
|
|
509
|
-
callback=
|
|
509
|
+
callback=DEFAULT_CALLBACK,
|
|
510
510
|
batch_size=None,
|
|
511
511
|
maxdepth=None,
|
|
512
512
|
**kwargs,
|
|
@@ -568,8 +568,8 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
568
568
|
coros = []
|
|
569
569
|
callback.set_size(len(file_pairs))
|
|
570
570
|
for lfile, rfile in file_pairs:
|
|
571
|
-
callback.
|
|
572
|
-
coros.append(
|
|
571
|
+
put_file = callback.branch_coro(self._put_file)
|
|
572
|
+
coros.append(put_file(lfile, rfile, **kwargs))
|
|
573
573
|
|
|
574
574
|
return await _run_coros_in_chunks(
|
|
575
575
|
coros, batch_size=batch_size, callback=callback
|
|
@@ -583,7 +583,7 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
583
583
|
rpath,
|
|
584
584
|
lpath,
|
|
585
585
|
recursive=False,
|
|
586
|
-
callback=
|
|
586
|
+
callback=DEFAULT_CALLBACK,
|
|
587
587
|
maxdepth=None,
|
|
588
588
|
**kwargs,
|
|
589
589
|
):
|
|
@@ -645,8 +645,8 @@ class AsyncFileSystem(AbstractFileSystem):
|
|
|
645
645
|
coros = []
|
|
646
646
|
callback.set_size(len(lpaths))
|
|
647
647
|
for lpath, rpath in zip(lpaths, rpaths):
|
|
648
|
-
callback.
|
|
649
|
-
coros.append(
|
|
648
|
+
get_file = callback.branch_coro(self._get_file)
|
|
649
|
+
coros.append(get_file(rpath, lpath, **kwargs))
|
|
650
650
|
return await _run_coros_in_chunks(
|
|
651
651
|
coros, batch_size=batch_size, callback=callback
|
|
652
652
|
)
|
fsspec/callbacks.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class Callback:
|
|
2
5
|
"""
|
|
3
6
|
Base class and interface for callback mechanism
|
|
@@ -25,6 +28,60 @@ class Callback:
|
|
|
25
28
|
self.hooks = hooks or {}
|
|
26
29
|
self.kw = kwargs
|
|
27
30
|
|
|
31
|
+
def __enter__(self):
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
def __exit__(self, *exc_args):
|
|
35
|
+
self.close()
|
|
36
|
+
|
|
37
|
+
def close(self):
|
|
38
|
+
"""Close callback."""
|
|
39
|
+
|
|
40
|
+
def branched(self, path_1, path_2, **kwargs):
|
|
41
|
+
"""
|
|
42
|
+
Return callback for child transfers
|
|
43
|
+
|
|
44
|
+
If this callback is operating at a higher level, e.g., put, which may
|
|
45
|
+
trigger transfers that can also be monitored. The function returns a callback
|
|
46
|
+
that has to be passed to the child method, e.g., put_file,
|
|
47
|
+
as `callback=` argument.
|
|
48
|
+
|
|
49
|
+
The implementation uses `callback.branch` for compatibility.
|
|
50
|
+
When implementing callbacks, it is recommended to override this function instead
|
|
51
|
+
of `branch` and avoid calling `super().branched(...)`.
|
|
52
|
+
|
|
53
|
+
Prefer using this function over `branch`.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
path_1: str
|
|
58
|
+
Child's source path
|
|
59
|
+
path_2: str
|
|
60
|
+
Child's destination path
|
|
61
|
+
**kwargs:
|
|
62
|
+
Arbitrary keyword arguments
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
callback: Callback
|
|
67
|
+
A callback instance to be passed to the child method
|
|
68
|
+
"""
|
|
69
|
+
self.branch(path_1, path_2, kwargs)
|
|
70
|
+
# mutate kwargs so that we can force the caller to pass "callback=" explicitly
|
|
71
|
+
return kwargs.pop("callback", DEFAULT_CALLBACK)
|
|
72
|
+
|
|
73
|
+
def branch_coro(self, fn):
|
|
74
|
+
"""
|
|
75
|
+
Wraps a coroutine, and pass a new child callback to it.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
@wraps(fn)
|
|
79
|
+
async def func(path1, path2: str, **kwargs):
|
|
80
|
+
with self.branched(path1, path2, **kwargs) as child:
|
|
81
|
+
return await fn(path1, path2, callback=child, **kwargs)
|
|
82
|
+
|
|
83
|
+
return func
|
|
84
|
+
|
|
28
85
|
def set_size(self, size):
|
|
29
86
|
"""
|
|
30
87
|
Set the internal maximum size attribute
|
|
@@ -140,10 +197,10 @@ class Callback:
|
|
|
140
197
|
|
|
141
198
|
For the special value of ``None``, return the global instance of
|
|
142
199
|
``NoOpCallback``. This is an alternative to including
|
|
143
|
-
``callback=
|
|
200
|
+
``callback=DEFAULT_CALLBACK`` directly in a method signature.
|
|
144
201
|
"""
|
|
145
202
|
if maybe_callback is None:
|
|
146
|
-
return
|
|
203
|
+
return DEFAULT_CALLBACK
|
|
147
204
|
return maybe_callback
|
|
148
205
|
|
|
149
206
|
|
|
@@ -186,7 +243,9 @@ class TqdmCallback(Callback):
|
|
|
186
243
|
tqdm_kwargs : dict, (optional)
|
|
187
244
|
Any argument accepted by the tqdm constructor.
|
|
188
245
|
See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
|
|
189
|
-
Will be forwarded to
|
|
246
|
+
Will be forwarded to `tqdm_cls`.
|
|
247
|
+
tqdm_cls: (optional)
|
|
248
|
+
subclass of `tqdm.tqdm`. If not passed, it will default to `tqdm.tqdm`.
|
|
190
249
|
|
|
191
250
|
Examples
|
|
192
251
|
--------
|
|
@@ -209,30 +268,57 @@ class TqdmCallback(Callback):
|
|
|
209
268
|
recursive=True,
|
|
210
269
|
callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
|
|
211
270
|
)
|
|
271
|
+
|
|
272
|
+
You can also customize the progress bar by passing a subclass of `tqdm`.
|
|
273
|
+
|
|
274
|
+
.. code-block:: python
|
|
275
|
+
|
|
276
|
+
class TqdmFormat(tqdm):
|
|
277
|
+
'''Provides a `total_time` format parameter'''
|
|
278
|
+
@property
|
|
279
|
+
def format_dict(self):
|
|
280
|
+
d = super().format_dict
|
|
281
|
+
total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1)
|
|
282
|
+
d.update(total_time=self.format_interval(total_time) + " in total")
|
|
283
|
+
return d
|
|
284
|
+
|
|
285
|
+
>>> with TqdmCallback(
|
|
286
|
+
tqdm_kwargs={
|
|
287
|
+
"desc": "desc",
|
|
288
|
+
"bar_format": "{total_time}: {percentage:.0f}%|{bar}{r_bar}",
|
|
289
|
+
},
|
|
290
|
+
tqdm_cls=TqdmFormat,
|
|
291
|
+
) as callback:
|
|
292
|
+
fs.upload(".", path2distant_data, recursive=True, callback=callback)
|
|
212
293
|
"""
|
|
213
294
|
|
|
214
295
|
def __init__(self, tqdm_kwargs=None, *args, **kwargs):
|
|
215
296
|
try:
|
|
216
|
-
import tqdm
|
|
297
|
+
from tqdm import tqdm
|
|
217
298
|
|
|
218
|
-
self._tqdm = tqdm
|
|
219
299
|
except ImportError as exce:
|
|
220
300
|
raise ImportError(
|
|
221
301
|
"Using TqdmCallback requires tqdm to be installed"
|
|
222
302
|
) from exce
|
|
223
303
|
|
|
304
|
+
self._tqdm_cls = kwargs.pop("tqdm_cls", tqdm)
|
|
224
305
|
self._tqdm_kwargs = tqdm_kwargs or {}
|
|
306
|
+
self.tqdm = None
|
|
225
307
|
super().__init__(*args, **kwargs)
|
|
226
308
|
|
|
227
|
-
def
|
|
228
|
-
self.tqdm
|
|
309
|
+
def call(self, *args, **kwargs):
|
|
310
|
+
if self.tqdm is None:
|
|
311
|
+
self.tqdm = self._tqdm_cls(total=self.size, **self._tqdm_kwargs)
|
|
312
|
+
self.tqdm.total = self.size
|
|
313
|
+
self.tqdm.update(self.value - self.tqdm.n)
|
|
229
314
|
|
|
230
|
-
def
|
|
231
|
-
self.tqdm
|
|
315
|
+
def close(self):
|
|
316
|
+
if self.tqdm is not None:
|
|
317
|
+
self.tqdm.close()
|
|
318
|
+
self.tqdm = None
|
|
232
319
|
|
|
233
320
|
def __del__(self):
|
|
234
|
-
self.
|
|
235
|
-
self.tqdm = None
|
|
321
|
+
return self.close()
|
|
236
322
|
|
|
237
323
|
|
|
238
|
-
_DEFAULT_CALLBACK = NoOpCallback()
|
|
324
|
+
DEFAULT_CALLBACK = _DEFAULT_CALLBACK = NoOpCallback()
|
fsspec/compression.py
CHANGED
|
@@ -90,15 +90,15 @@ except ImportError:
|
|
|
90
90
|
try:
|
|
91
91
|
from lzma import LZMAFile
|
|
92
92
|
|
|
93
|
-
register_compression("lzma", LZMAFile, "
|
|
94
|
-
register_compression("xz", LZMAFile, "xz"
|
|
93
|
+
register_compression("lzma", LZMAFile, "lzma")
|
|
94
|
+
register_compression("xz", LZMAFile, "xz")
|
|
95
95
|
except ImportError:
|
|
96
96
|
pass
|
|
97
97
|
|
|
98
98
|
try:
|
|
99
99
|
import lzmaffi
|
|
100
100
|
|
|
101
|
-
register_compression("lzma", lzmaffi.LZMAFile, "
|
|
101
|
+
register_compression("lzma", lzmaffi.LZMAFile, "lzma", force=True)
|
|
102
102
|
register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
|
|
103
103
|
except ImportError:
|
|
104
104
|
pass
|
fsspec/exceptions.py
CHANGED
fsspec/generic.py
CHANGED
|
@@ -8,7 +8,7 @@ import uuid
|
|
|
8
8
|
from typing import Optional
|
|
9
9
|
|
|
10
10
|
from .asyn import AsyncFileSystem, _run_coros_in_chunks, sync_wrapper
|
|
11
|
-
from .callbacks import
|
|
11
|
+
from .callbacks import DEFAULT_CALLBACK
|
|
12
12
|
from .core import filesystem, get_filesystem_class, split_protocol, url_to_fs
|
|
13
13
|
|
|
14
14
|
_generic_fs = {}
|
|
@@ -279,7 +279,7 @@ class GenericFileSystem(AsyncFileSystem):
|
|
|
279
279
|
url,
|
|
280
280
|
url2,
|
|
281
281
|
blocksize=2**20,
|
|
282
|
-
callback=
|
|
282
|
+
callback=DEFAULT_CALLBACK,
|
|
283
283
|
**kwargs,
|
|
284
284
|
):
|
|
285
285
|
fs = _resolve_fs(url, self.method)
|
fsspec/gui.py
CHANGED
|
@@ -153,8 +153,9 @@ class SigSlot:
|
|
|
153
153
|
break
|
|
154
154
|
except Exception as e:
|
|
155
155
|
logger.exception(
|
|
156
|
-
"Exception (%s) while executing callback for signal: %s"
|
|
157
|
-
|
|
156
|
+
"Exception (%s) while executing callback for signal: %s",
|
|
157
|
+
e,
|
|
158
|
+
sig,
|
|
158
159
|
)
|
|
159
160
|
|
|
160
161
|
def show(self, threads=False):
|
fsspec/implementations/arrow.py
CHANGED
|
@@ -5,6 +5,7 @@ import secrets
|
|
|
5
5
|
import shutil
|
|
6
6
|
from contextlib import suppress
|
|
7
7
|
from functools import cached_property, wraps
|
|
8
|
+
from urllib.parse import parse_qs
|
|
8
9
|
|
|
9
10
|
from fsspec.spec import AbstractFileSystem
|
|
10
11
|
from fsspec.utils import (
|
|
@@ -255,6 +256,7 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
255
256
|
port=0,
|
|
256
257
|
user=None,
|
|
257
258
|
kerb_ticket=None,
|
|
259
|
+
replication=3,
|
|
258
260
|
extra_conf=None,
|
|
259
261
|
**kwargs,
|
|
260
262
|
):
|
|
@@ -270,6 +272,8 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
270
272
|
If given, connect as this username
|
|
271
273
|
kerb_ticket: str or None
|
|
272
274
|
If given, use this ticket for authentication
|
|
275
|
+
replication: int
|
|
276
|
+
set replication factor of file for write operations. default value is 3.
|
|
273
277
|
extra_conf: None or dict
|
|
274
278
|
Passed on to HadoopFileSystem
|
|
275
279
|
"""
|
|
@@ -280,6 +284,7 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
280
284
|
port=port,
|
|
281
285
|
user=user,
|
|
282
286
|
kerb_ticket=kerb_ticket,
|
|
287
|
+
replication=replication,
|
|
283
288
|
extra_conf=extra_conf,
|
|
284
289
|
)
|
|
285
290
|
super().__init__(fs=fs, **kwargs)
|
|
@@ -294,4 +299,8 @@ class HadoopFileSystem(ArrowFSWrapper):
|
|
|
294
299
|
out["user"] = ops["username"]
|
|
295
300
|
if ops.get("port", None):
|
|
296
301
|
out["port"] = ops["port"]
|
|
302
|
+
if ops.get("url_query", None):
|
|
303
|
+
queries = parse_qs(ops["url_query"])
|
|
304
|
+
if queries.get("replication", None):
|
|
305
|
+
out["replication"] = int(queries["replication"][0])
|
|
297
306
|
return out
|
|
@@ -2,13 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import hashlib
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
6
5
|
|
|
7
6
|
from fsspec.implementations.local import make_path_posix
|
|
8
7
|
|
|
9
|
-
if TYPE_CHECKING:
|
|
10
|
-
from typing import Any
|
|
11
|
-
|
|
12
8
|
|
|
13
9
|
class AbstractCacheMapper(abc.ABC):
|
|
14
10
|
"""Abstract super-class for mappers from remote URLs to local cached
|
|
@@ -19,7 +15,7 @@ class AbstractCacheMapper(abc.ABC):
|
|
|
19
15
|
def __call__(self, path: str) -> str:
|
|
20
16
|
...
|
|
21
17
|
|
|
22
|
-
def __eq__(self, other:
|
|
18
|
+
def __eq__(self, other: object) -> bool:
|
|
23
19
|
# Identity only depends on class. When derived classes have attributes
|
|
24
20
|
# they will need to be included.
|
|
25
21
|
return isinstance(other, type(self))
|
|
@@ -56,7 +52,7 @@ class BasenameCacheMapper(AbstractCacheMapper):
|
|
|
56
52
|
else:
|
|
57
53
|
return prefix # No separator found, simple filename
|
|
58
54
|
|
|
59
|
-
def __eq__(self, other:
|
|
55
|
+
def __eq__(self, other: object) -> bool:
|
|
60
56
|
return super().__eq__(other) and self.directory_levels == other.directory_levels
|
|
61
57
|
|
|
62
58
|
def __hash__(self) -> int:
|
fsspec/implementations/cached.py
CHANGED
|
@@ -10,7 +10,7 @@ from shutil import rmtree
|
|
|
10
10
|
from typing import TYPE_CHECKING, Any, Callable, ClassVar
|
|
11
11
|
|
|
12
12
|
from fsspec import AbstractFileSystem, filesystem
|
|
13
|
-
from fsspec.callbacks import
|
|
13
|
+
from fsspec.callbacks import DEFAULT_CALLBACK
|
|
14
14
|
from fsspec.compression import compr
|
|
15
15
|
from fsspec.core import BaseCache, MMapCache
|
|
16
16
|
from fsspec.exceptions import BlocksizeMismatchError
|
|
@@ -524,7 +524,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
524
524
|
protocol = "filecache"
|
|
525
525
|
local_file = True
|
|
526
526
|
|
|
527
|
-
def open_many(self, open_files):
|
|
527
|
+
def open_many(self, open_files, **kwargs):
|
|
528
528
|
paths = [of.path for of in open_files]
|
|
529
529
|
if "r" in open_files.mode:
|
|
530
530
|
self._mkcache()
|
|
@@ -535,6 +535,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
535
535
|
path,
|
|
536
536
|
mode=open_files.mode,
|
|
537
537
|
fn=os.path.join(self.storage[-1], self._mapper(path)),
|
|
538
|
+
**kwargs,
|
|
538
539
|
)
|
|
539
540
|
for path in paths
|
|
540
541
|
]
|
|
@@ -606,7 +607,7 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
606
607
|
path,
|
|
607
608
|
recursive=False,
|
|
608
609
|
on_error="raise",
|
|
609
|
-
callback=
|
|
610
|
+
callback=DEFAULT_CALLBACK,
|
|
610
611
|
**kwargs,
|
|
611
612
|
):
|
|
612
613
|
paths = self.expand_path(
|
|
@@ -650,7 +651,13 @@ class WholeFileCacheFileSystem(CachingFileSystem):
|
|
|
650
651
|
path = self._strip_protocol(path)
|
|
651
652
|
if "r" not in mode:
|
|
652
653
|
fn = self._make_local_details(path)
|
|
653
|
-
|
|
654
|
+
user_specified_kwargs = {
|
|
655
|
+
k: v
|
|
656
|
+
for k, v in kwargs.items()
|
|
657
|
+
# those kwargs were added by open(), we don't want them
|
|
658
|
+
if k not in ["autocommit", "block_size", "cache_options"]
|
|
659
|
+
}
|
|
660
|
+
return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)
|
|
654
661
|
detail = self._check_file(path)
|
|
655
662
|
if detail:
|
|
656
663
|
detail, fn = detail
|
|
@@ -775,8 +782,18 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
775
782
|
|
|
776
783
|
if "r" not in mode:
|
|
777
784
|
fn = os.path.join(self.storage[-1], sha)
|
|
785
|
+
user_specified_kwargs = {
|
|
786
|
+
k: v
|
|
787
|
+
for k, v in kwargs.items()
|
|
788
|
+
if k not in ["autocommit", "block_size", "cache_options"]
|
|
789
|
+
} # those were added by open()
|
|
778
790
|
return LocalTempFile(
|
|
779
|
-
self,
|
|
791
|
+
self,
|
|
792
|
+
path,
|
|
793
|
+
mode=mode,
|
|
794
|
+
autocommit=not self._intrans,
|
|
795
|
+
fn=fn,
|
|
796
|
+
**user_specified_kwargs,
|
|
780
797
|
)
|
|
781
798
|
fn = self._check_file(path)
|
|
782
799
|
if fn:
|
|
@@ -812,7 +829,7 @@ class SimpleCacheFileSystem(WholeFileCacheFileSystem):
|
|
|
812
829
|
class LocalTempFile:
|
|
813
830
|
"""A temporary local file, which will be uploaded on commit"""
|
|
814
831
|
|
|
815
|
-
def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0):
|
|
832
|
+
def __init__(self, fs, path, fn, mode="wb", autocommit=True, seek=0, **kwargs):
|
|
816
833
|
self.fn = fn
|
|
817
834
|
self.fh = open(fn, mode)
|
|
818
835
|
self.mode = mode
|
|
@@ -822,6 +839,7 @@ class LocalTempFile:
|
|
|
822
839
|
self.fs = fs
|
|
823
840
|
self.closed = False
|
|
824
841
|
self.autocommit = autocommit
|
|
842
|
+
self.kwargs = kwargs
|
|
825
843
|
|
|
826
844
|
def __reduce__(self):
|
|
827
845
|
# always open in r+b to allow continuing writing at a location
|
|
@@ -849,7 +867,7 @@ class LocalTempFile:
|
|
|
849
867
|
os.remove(self.fn)
|
|
850
868
|
|
|
851
869
|
def commit(self):
|
|
852
|
-
self.fs.put(self.fn, self.path)
|
|
870
|
+
self.fs.put(self.fn, self.path, **self.kwargs)
|
|
853
871
|
try:
|
|
854
872
|
os.remove(self.fn)
|
|
855
873
|
except (PermissionError, FileNotFoundError):
|
fsspec/implementations/dbfs.py
CHANGED
|
@@ -2,6 +2,8 @@ import base64
|
|
|
2
2
|
import urllib
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
|
+
import requests.exceptions
|
|
6
|
+
from requests.adapters import HTTPAdapter, Retry
|
|
5
7
|
|
|
6
8
|
from fsspec import AbstractFileSystem
|
|
7
9
|
from fsspec.spec import AbstractBufferedFile
|
|
@@ -42,13 +44,19 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
42
44
|
"""
|
|
43
45
|
self.instance = instance
|
|
44
46
|
self.token = token
|
|
45
|
-
|
|
46
47
|
self.session = requests.Session()
|
|
48
|
+
self.retries = Retry(
|
|
49
|
+
total=10,
|
|
50
|
+
backoff_factor=0.05,
|
|
51
|
+
status_forcelist=[408, 429, 500, 502, 503, 504],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
self.session.mount("https://", HTTPAdapter(max_retries=self.retries))
|
|
47
55
|
self.session.headers.update({"Authorization": f"Bearer {self.token}"})
|
|
48
56
|
|
|
49
57
|
super().__init__(**kwargs)
|
|
50
58
|
|
|
51
|
-
def ls(self, path, detail=True):
|
|
59
|
+
def ls(self, path, detail=True, **kwargs):
|
|
52
60
|
"""
|
|
53
61
|
List the contents of the given path.
|
|
54
62
|
|
|
@@ -137,7 +145,7 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
137
145
|
|
|
138
146
|
self.mkdirs(path, **kwargs)
|
|
139
147
|
|
|
140
|
-
def rm(self, path, recursive=False):
|
|
148
|
+
def rm(self, path, recursive=False, **kwargs):
|
|
141
149
|
"""
|
|
142
150
|
Remove the file or folder at the given absolute path.
|
|
143
151
|
|
|
@@ -166,7 +174,9 @@ class DatabricksFileSystem(AbstractFileSystem):
|
|
|
166
174
|
raise e
|
|
167
175
|
self.invalidate_cache(self._parent(path))
|
|
168
176
|
|
|
169
|
-
def mv(
|
|
177
|
+
def mv(
|
|
178
|
+
self, source_path, destination_path, recursive=False, maxdepth=None, **kwargs
|
|
179
|
+
):
|
|
170
180
|
"""
|
|
171
181
|
Move a source to a destination path.
|
|
172
182
|
|
fsspec/implementations/dirfs.py
CHANGED
|
@@ -124,6 +124,12 @@ class DirFileSystem(AsyncFileSystem):
|
|
|
124
124
|
def pipe(self, path, *args, **kwargs):
|
|
125
125
|
return self.fs.pipe(self._join(path), *args, **kwargs)
|
|
126
126
|
|
|
127
|
+
async def _pipe_file(self, path, *args, **kwargs):
|
|
128
|
+
return await self.fs._pipe_file(self._join(path), *args, **kwargs)
|
|
129
|
+
|
|
130
|
+
def pipe_file(self, path, *args, **kwargs):
|
|
131
|
+
return self.fs.pipe_file(self._join(path), *args, **kwargs)
|
|
132
|
+
|
|
127
133
|
async def _cat_file(self, path, *args, **kwargs):
|
|
128
134
|
return await self.fs._cat_file(self._join(path), *args, **kwargs)
|
|
129
135
|
|
fsspec/implementations/ftp.py
CHANGED
|
@@ -171,12 +171,15 @@ class FTPFileSystem(AbstractFileSystem):
|
|
|
171
171
|
def cb(x):
|
|
172
172
|
out.append(x)
|
|
173
173
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
174
|
+
try:
|
|
175
|
+
self.ftp.retrbinary(
|
|
176
|
+
f"RETR {path}",
|
|
177
|
+
blocksize=self.blocksize,
|
|
178
|
+
rest=start,
|
|
179
|
+
callback=cb,
|
|
180
|
+
)
|
|
181
|
+
except (Error, error_perm) as orig_exc:
|
|
182
|
+
raise FileNotFoundError(path) from orig_exc
|
|
180
183
|
return b"".join(out)
|
|
181
184
|
|
|
182
185
|
def _open(
|
|
@@ -361,15 +364,17 @@ def _mlsd2(ftp, path="."):
|
|
|
361
364
|
minfo = []
|
|
362
365
|
ftp.dir(path, lines.append)
|
|
363
366
|
for line in lines:
|
|
364
|
-
|
|
367
|
+
split_line = line.split()
|
|
368
|
+
if len(split_line) < 9:
|
|
369
|
+
continue
|
|
365
370
|
this = (
|
|
366
|
-
|
|
371
|
+
split_line[-1],
|
|
367
372
|
{
|
|
368
|
-
"modify": " ".join(
|
|
369
|
-
"unix.owner":
|
|
370
|
-
"unix.group":
|
|
371
|
-
"unix.mode":
|
|
372
|
-
"size":
|
|
373
|
+
"modify": " ".join(split_line[5:8]),
|
|
374
|
+
"unix.owner": split_line[2],
|
|
375
|
+
"unix.group": split_line[3],
|
|
376
|
+
"unix.mode": split_line[0],
|
|
377
|
+
"size": split_line[4],
|
|
373
378
|
},
|
|
374
379
|
)
|
|
375
380
|
if "d" == this[1]["unix.mode"][0]:
|
fsspec/implementations/github.py
CHANGED
|
@@ -36,8 +36,11 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
36
36
|
url = "https://api.github.com/repos/{org}/{repo}/git/trees/{sha}"
|
|
37
37
|
rurl = "https://raw.githubusercontent.com/{org}/{repo}/{sha}/{path}"
|
|
38
38
|
protocol = "github"
|
|
39
|
+
timeout = (60, 60) # connect, read timeouts
|
|
39
40
|
|
|
40
|
-
def __init__(
|
|
41
|
+
def __init__(
|
|
42
|
+
self, org, repo, sha=None, username=None, token=None, timeout=None, **kwargs
|
|
43
|
+
):
|
|
41
44
|
super().__init__(**kwargs)
|
|
42
45
|
self.org = org
|
|
43
46
|
self.repo = repo
|
|
@@ -45,10 +48,14 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
45
48
|
raise ValueError("Auth required both username and token")
|
|
46
49
|
self.username = username
|
|
47
50
|
self.token = token
|
|
51
|
+
if timeout is not None:
|
|
52
|
+
self.timeout = timeout
|
|
48
53
|
if sha is None:
|
|
49
54
|
# look up default branch (not necessarily "master")
|
|
50
55
|
u = "https://api.github.com/repos/{org}/{repo}"
|
|
51
|
-
r = requests.get(
|
|
56
|
+
r = requests.get(
|
|
57
|
+
u.format(org=org, repo=repo), timeout=self.timeout, **self.kw
|
|
58
|
+
)
|
|
52
59
|
r.raise_for_status()
|
|
53
60
|
sha = r.json()["default_branch"]
|
|
54
61
|
|
|
@@ -79,7 +86,8 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
79
86
|
List of string
|
|
80
87
|
"""
|
|
81
88
|
r = requests.get(
|
|
82
|
-
f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos"
|
|
89
|
+
f"https://api.github.com/{['users', 'orgs'][is_org]}/{org_or_user}/repos",
|
|
90
|
+
timeout=cls.timeout,
|
|
83
91
|
)
|
|
84
92
|
r.raise_for_status()
|
|
85
93
|
return [repo["name"] for repo in r.json()]
|
|
@@ -89,6 +97,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
89
97
|
"""Names of tags in the repo"""
|
|
90
98
|
r = requests.get(
|
|
91
99
|
f"https://api.github.com/repos/{self.org}/{self.repo}/tags",
|
|
100
|
+
timeout=self.timeout,
|
|
92
101
|
**self.kw,
|
|
93
102
|
)
|
|
94
103
|
r.raise_for_status()
|
|
@@ -99,6 +108,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
99
108
|
"""Names of branches in the repo"""
|
|
100
109
|
r = requests.get(
|
|
101
110
|
f"https://api.github.com/repos/{self.org}/{self.repo}/branches",
|
|
111
|
+
timeout=self.timeout,
|
|
102
112
|
**self.kw,
|
|
103
113
|
)
|
|
104
114
|
r.raise_for_status()
|
|
@@ -147,7 +157,9 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
147
157
|
_sha = out["sha"]
|
|
148
158
|
if path not in self.dircache or sha not in [self.root, None]:
|
|
149
159
|
r = requests.get(
|
|
150
|
-
self.url.format(org=self.org, repo=self.repo, sha=_sha),
|
|
160
|
+
self.url.format(org=self.org, repo=self.repo, sha=_sha),
|
|
161
|
+
timeout=self.timeout,
|
|
162
|
+
**self.kw,
|
|
151
163
|
)
|
|
152
164
|
if r.status_code == 404:
|
|
153
165
|
raise FileNotFoundError(path)
|
|
@@ -208,7 +220,7 @@ class GithubFileSystem(AbstractFileSystem):
|
|
|
208
220
|
url = self.rurl.format(
|
|
209
221
|
org=self.org, repo=self.repo, path=path, sha=sha or self.root
|
|
210
222
|
)
|
|
211
|
-
r = requests.get(url, **self.kw)
|
|
223
|
+
r = requests.get(url, timeout=self.timeout, **self.kw)
|
|
212
224
|
if r.status_code == 404:
|
|
213
225
|
raise FileNotFoundError(path)
|
|
214
226
|
r.raise_for_status()
|