fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +6 -1
- fsspec/_version.py +4 -4
- fsspec/archive.py +1 -1
- fsspec/asyn.py +35 -45
- fsspec/caching.py +161 -90
- fsspec/compression.py +2 -4
- fsspec/core.py +19 -6
- fsspec/fuse.py +2 -2
- fsspec/generic.py +5 -1
- fsspec/gui.py +4 -4
- fsspec/implementations/cached.py +105 -25
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/ftp.py +6 -6
- fsspec/implementations/git.py +3 -3
- fsspec/implementations/github.py +3 -7
- fsspec/implementations/http.py +34 -47
- fsspec/implementations/jupyter.py +5 -5
- fsspec/implementations/libarchive.py +1 -2
- fsspec/implementations/local.py +8 -4
- fsspec/implementations/memory.py +1 -1
- fsspec/implementations/reference.py +67 -25
- fsspec/implementations/sftp.py +11 -11
- fsspec/implementations/smb.py +4 -5
- fsspec/implementations/webhdfs.py +28 -8
- fsspec/implementations/zip.py +2 -2
- fsspec/mapping.py +2 -2
- fsspec/registry.py +8 -6
- fsspec/spec.py +41 -55
- fsspec/tests/abstract/common.py +5 -5
- fsspec/transaction.py +8 -4
- fsspec/utils.py +204 -37
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/METADATA +7 -6
- fsspec-2023.12.0.dist-info/RECORD +54 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/WHEEL +1 -1
- fsspec/implementations/http_sync.py +0 -882
- fsspec-2023.9.2.dist-info/RECORD +0 -54
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/top_level.txt +0 -0
|
@@ -40,7 +40,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
|
|
|
40
40
|
|
|
41
41
|
def ls(self, path, detail=True, **kwargs):
|
|
42
42
|
path = self._strip_protocol(path)
|
|
43
|
-
r = self.session.get(self.url
|
|
43
|
+
r = self.session.get(f"{self.url}/{path}")
|
|
44
44
|
if r.status_code == 404:
|
|
45
45
|
return FileNotFoundError(path)
|
|
46
46
|
r.raise_for_status()
|
|
@@ -61,7 +61,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
|
|
|
61
61
|
|
|
62
62
|
def cat_file(self, path, start=None, end=None, **kwargs):
|
|
63
63
|
path = self._strip_protocol(path)
|
|
64
|
-
r = self.session.get(self.url
|
|
64
|
+
r = self.session.get(f"{self.url}/{path}")
|
|
65
65
|
if r.status_code == 404:
|
|
66
66
|
return FileNotFoundError(path)
|
|
67
67
|
r.raise_for_status()
|
|
@@ -83,7 +83,7 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
|
|
|
83
83
|
"format": "base64",
|
|
84
84
|
"type": "file",
|
|
85
85
|
}
|
|
86
|
-
self.session.put(self.url
|
|
86
|
+
self.session.put(f"{self.url}/{path}", json=json)
|
|
87
87
|
|
|
88
88
|
def mkdir(self, path, create_parents=True, **kwargs):
|
|
89
89
|
path = self._strip_protocol(path)
|
|
@@ -96,11 +96,11 @@ class JupyterFileSystem(fsspec.AbstractFileSystem):
|
|
|
96
96
|
"content": None,
|
|
97
97
|
"type": "directory",
|
|
98
98
|
}
|
|
99
|
-
self.session.put(self.url
|
|
99
|
+
self.session.put(f"{self.url}/{path}", json=json)
|
|
100
100
|
|
|
101
101
|
def _rm(self, path):
|
|
102
102
|
path = self._strip_protocol(path)
|
|
103
|
-
self.session.delete(self.url
|
|
103
|
+
self.session.delete(f"{self.url}/{path}")
|
|
104
104
|
|
|
105
105
|
def _open(self, path, mode="rb", **kwargs):
|
|
106
106
|
path = self._strip_protocol(path)
|
|
@@ -122,8 +122,7 @@ class LibArchiveFileSystem(AbstractArchiveFileSystem):
|
|
|
122
122
|
files = open_files(fo, protocol=target_protocol, **(target_options or {}))
|
|
123
123
|
if len(files) != 1:
|
|
124
124
|
raise ValueError(
|
|
125
|
-
'Path "{}" did not resolve to exactly'
|
|
126
|
-
'one file: "{}"'.format(fo, files)
|
|
125
|
+
f'Path "{fo}" did not resolve to exactly one file: "{files}"'
|
|
127
126
|
)
|
|
128
127
|
fo = files[0]
|
|
129
128
|
self.of = fo
|
fsspec/implementations/local.py
CHANGED
|
@@ -29,7 +29,7 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
root_marker = "/"
|
|
32
|
-
protocol = "file"
|
|
32
|
+
protocol = "file", "local"
|
|
33
33
|
local_file = True
|
|
34
34
|
|
|
35
35
|
def __init__(self, auto_mkdir=False, **kwargs):
|
|
@@ -98,7 +98,7 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
98
98
|
"islink": link,
|
|
99
99
|
}
|
|
100
100
|
for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]:
|
|
101
|
-
result[field] = getattr(out, "st_"
|
|
101
|
+
result[field] = getattr(out, f"st_{field}")
|
|
102
102
|
if result["islink"]:
|
|
103
103
|
result["destination"] = os.readlink(path)
|
|
104
104
|
try:
|
|
@@ -215,6 +215,10 @@ class LocalFileSystem(AbstractFileSystem):
|
|
|
215
215
|
path = path[7:]
|
|
216
216
|
elif path.startswith("file:"):
|
|
217
217
|
path = path[5:]
|
|
218
|
+
elif path.startswith("local://"):
|
|
219
|
+
path = path[8:]
|
|
220
|
+
elif path.startswith("local:"):
|
|
221
|
+
path = path[6:]
|
|
218
222
|
return make_path_posix(path).rstrip("/") or cls.root_marker
|
|
219
223
|
|
|
220
224
|
def _isfilestore(self):
|
|
@@ -240,7 +244,7 @@ def make_path_posix(path, sep=os.sep):
|
|
|
240
244
|
return path
|
|
241
245
|
if path.startswith("./"):
|
|
242
246
|
path = path[2:]
|
|
243
|
-
return os.getcwd()
|
|
247
|
+
return f"{os.getcwd()}/{path}"
|
|
244
248
|
if (
|
|
245
249
|
(sep not in path and "/" not in path)
|
|
246
250
|
or (sep == "/" and not path.startswith("/"))
|
|
@@ -251,7 +255,7 @@ def make_path_posix(path, sep=os.sep):
|
|
|
251
255
|
# abspath made some more '\\' separators
|
|
252
256
|
return make_path_posix(osp.abspath(path))
|
|
253
257
|
else:
|
|
254
|
-
return os.getcwd()
|
|
258
|
+
return f"{os.getcwd()}/{path}"
|
|
255
259
|
if path.startswith("file://"):
|
|
256
260
|
path = path[7:]
|
|
257
261
|
if re.match("/[A-Za-z]:", path):
|
fsspec/implementations/memory.py
CHANGED
|
@@ -175,7 +175,7 @@ class MemoryFileSystem(AbstractFileSystem):
|
|
|
175
175
|
parent = self._parent(parent)
|
|
176
176
|
if self.isfile(parent):
|
|
177
177
|
raise FileExistsError(parent)
|
|
178
|
-
if mode in ["rb", "ab", "
|
|
178
|
+
if mode in ["rb", "ab", "r+b"]:
|
|
179
179
|
if path in self.store:
|
|
180
180
|
f = self.store[path]
|
|
181
181
|
if mode == "ab":
|
|
@@ -82,8 +82,12 @@ def ravel_multi_index(idx, sizes):
|
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
85
|
-
"""
|
|
86
|
-
references
|
|
85
|
+
"""This interface can be used to read/write references from Parquet stores.
|
|
86
|
+
It is not intended for other types of references.
|
|
87
|
+
It can be used with Kerchunk's MultiZarrToZarr method to combine
|
|
88
|
+
references into a parquet store.
|
|
89
|
+
Examples of this use-case can be found here:
|
|
90
|
+
https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""
|
|
87
91
|
|
|
88
92
|
# import is class level to prevent numpy dep requirement for fsspec
|
|
89
93
|
@property
|
|
@@ -108,17 +112,24 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
108
112
|
Root of parquet store
|
|
109
113
|
fs : fsspec.AbstractFileSystem
|
|
110
114
|
fsspec filesystem object, default is local filesystem.
|
|
111
|
-
cache_size : int
|
|
115
|
+
cache_size : int, default=128
|
|
112
116
|
Maximum size of LRU cache, where cache_size*record_size denotes
|
|
113
117
|
the total number of references that can be loaded in memory at once.
|
|
118
|
+
categorical_threshold : int
|
|
119
|
+
Encode urls as pandas.Categorical to reduce memory footprint if the ratio
|
|
120
|
+
of the number of unique urls to total number of refs for each variable
|
|
121
|
+
is greater than or equal to this number. (default 10)
|
|
122
|
+
|
|
123
|
+
|
|
114
124
|
"""
|
|
115
125
|
self.root = root
|
|
116
126
|
self.chunk_sizes = {}
|
|
117
127
|
self._items = {}
|
|
118
128
|
self.dirs = None
|
|
119
129
|
self.fs = fsspec.filesystem("file") if fs is None else fs
|
|
120
|
-
|
|
121
|
-
self.
|
|
130
|
+
self._items[".zmetadata"] = self.fs.cat_file(
|
|
131
|
+
"/".join([self.root, ".zmetadata"])
|
|
132
|
+
)
|
|
122
133
|
met = json.loads(self._items[".zmetadata"])
|
|
123
134
|
self.record_size = met["record_size"]
|
|
124
135
|
self.zmetadata = met["metadata"]
|
|
@@ -131,18 +142,37 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
131
142
|
def open_refs(field, record):
|
|
132
143
|
"""cached parquet file loader"""
|
|
133
144
|
path = self.url.format(field=field, record=record)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
# better here?
|
|
137
|
-
df = self.pd.read_parquet(f, engine="fastparquet")
|
|
145
|
+
data = io.BytesIO(self.fs.cat_file(path))
|
|
146
|
+
df = self.pd.read_parquet(data, engine="fastparquet")
|
|
138
147
|
refs = {c: df[c].values for c in df.columns}
|
|
139
148
|
return refs
|
|
140
149
|
|
|
141
150
|
self.open_refs = open_refs
|
|
142
151
|
|
|
143
152
|
@staticmethod
|
|
144
|
-
def create(
|
|
153
|
+
def create(root, storage_options=None, fs=None, record_size=10000, **kwargs):
|
|
154
|
+
"""Make empty parquet reference set
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
root: str
|
|
159
|
+
Directory to contain the output; will be created
|
|
160
|
+
storage_options: dict | None
|
|
161
|
+
For making the filesystem to use for writing is fs is None
|
|
162
|
+
fs: FileSystem | None
|
|
163
|
+
Filesystem for writing
|
|
164
|
+
record_size: int
|
|
165
|
+
Number of references per parquet file
|
|
166
|
+
kwargs: passed to __init__
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
LazyReferenceMapper instance
|
|
171
|
+
"""
|
|
145
172
|
met = {"metadata": {}, "record_size": record_size}
|
|
173
|
+
if fs is None:
|
|
174
|
+
fs, root = fsspec.core.url_to_fs(root, **(storage_options or {}))
|
|
175
|
+
fs.makedirs(root, exist_ok=True)
|
|
146
176
|
fs.pipe("/".join([root, ".zmetadata"]), json.dumps(met).encode())
|
|
147
177
|
return LazyReferenceMapper(root, fs, **kwargs)
|
|
148
178
|
|
|
@@ -283,7 +313,7 @@ class LazyReferenceMapper(collections.abc.MutableMapping):
|
|
|
283
313
|
def _generate_record(self, field, record):
|
|
284
314
|
"""The references for a given parquet file of a given field"""
|
|
285
315
|
refs = self.open_refs(field, record)
|
|
286
|
-
it = iter(zip(refs.values()))
|
|
316
|
+
it = iter(zip(*refs.values()))
|
|
287
317
|
if len(refs) == 3:
|
|
288
318
|
# All urls
|
|
289
319
|
return (list(t) for t in it)
|
|
@@ -594,7 +624,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
594
624
|
**(ref_storage_args or target_options or {}), protocol=target_protocol
|
|
595
625
|
)
|
|
596
626
|
ref_fs, fo2 = fsspec.core.url_to_fs(fo, **dic)
|
|
597
|
-
if ref_fs.isfile(
|
|
627
|
+
if ref_fs.isfile(fo2):
|
|
598
628
|
# text JSON
|
|
599
629
|
with fsspec.open(fo, "rb", **dic) as f:
|
|
600
630
|
logger.info("Read reference from URL %s", fo)
|
|
@@ -641,6 +671,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
641
671
|
self.fss[protocol] = fs
|
|
642
672
|
if remote_protocol is None:
|
|
643
673
|
# get single protocol from references
|
|
674
|
+
# TODO: warning here, since this can be very expensive?
|
|
644
675
|
for ref in self.references.values():
|
|
645
676
|
if callable(ref):
|
|
646
677
|
ref = ref()
|
|
@@ -763,24 +794,27 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
763
794
|
raise NotImplementedError
|
|
764
795
|
if isinstance(path, list) and (recursive or any("*" in p for p in path)):
|
|
765
796
|
raise NotImplementedError
|
|
797
|
+
# TODO: if references is lazy, pre-fetch all paths in batch before access
|
|
766
798
|
proto_dict = _protocol_groups(path, self.references)
|
|
767
799
|
out = {}
|
|
768
800
|
for proto, paths in proto_dict.items():
|
|
769
801
|
fs = self.fss[proto]
|
|
770
|
-
urls, starts, ends = [], [], []
|
|
802
|
+
urls, starts, ends, valid_paths = [], [], [], []
|
|
771
803
|
for p in paths:
|
|
772
804
|
# find references or label not-found. Early exit if any not
|
|
773
805
|
# found and on_error is "raise"
|
|
774
806
|
try:
|
|
775
807
|
u, s, e = self._cat_common(p)
|
|
776
|
-
urls.append(u)
|
|
777
|
-
starts.append(s)
|
|
778
|
-
ends.append(e)
|
|
779
808
|
except FileNotFoundError as err:
|
|
780
809
|
if on_error == "raise":
|
|
781
810
|
raise
|
|
782
811
|
if on_error != "omit":
|
|
783
812
|
out[p] = err
|
|
813
|
+
else:
|
|
814
|
+
urls.append(u)
|
|
815
|
+
starts.append(s)
|
|
816
|
+
ends.append(e)
|
|
817
|
+
valid_paths.append(p)
|
|
784
818
|
|
|
785
819
|
# process references into form for merging
|
|
786
820
|
urls2 = []
|
|
@@ -788,7 +822,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
788
822
|
ends2 = []
|
|
789
823
|
paths2 = []
|
|
790
824
|
whole_files = set()
|
|
791
|
-
for u, s, e, p in zip(urls, starts, ends,
|
|
825
|
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
|
792
826
|
if isinstance(u, bytes):
|
|
793
827
|
# data
|
|
794
828
|
out[p] = u
|
|
@@ -800,7 +834,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
800
834
|
starts2.append(s)
|
|
801
835
|
ends2.append(e)
|
|
802
836
|
paths2.append(p)
|
|
803
|
-
for u, s, e, p in zip(urls, starts, ends,
|
|
837
|
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
|
804
838
|
# second run to account for files that are to be loaded whole
|
|
805
839
|
if s is not None and u not in whole_files:
|
|
806
840
|
urls2.append(u)
|
|
@@ -820,7 +854,7 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
820
854
|
bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
|
|
821
855
|
|
|
822
856
|
# unbundle from merged bytes - simple approach
|
|
823
|
-
for u, s, e, p in zip(urls, starts, ends,
|
|
857
|
+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
|
|
824
858
|
if p in out:
|
|
825
859
|
continue # was bytes, already handled
|
|
826
860
|
for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):
|
|
@@ -954,16 +988,24 @@ class ReferenceFileSystem(AsyncFileSystem):
|
|
|
954
988
|
elif len(part) == 1:
|
|
955
989
|
size = None
|
|
956
990
|
else:
|
|
957
|
-
_,
|
|
991
|
+
_, _, size = part
|
|
958
992
|
par = path.rsplit("/", 1)[0] if "/" in path else ""
|
|
959
993
|
par0 = par
|
|
994
|
+
subdirs = [par0]
|
|
960
995
|
while par0 and par0 not in self.dircache:
|
|
961
|
-
#
|
|
962
|
-
self.dircache[par0] = []
|
|
963
|
-
self.dircache.setdefault(
|
|
964
|
-
par0.rsplit("/", 1)[0] if "/" in par0 else "", []
|
|
965
|
-
).append({"name": par0, "type": "directory", "size": 0})
|
|
996
|
+
# collect parent directories
|
|
966
997
|
par0 = self._parent(par0)
|
|
998
|
+
subdirs.append(par0)
|
|
999
|
+
|
|
1000
|
+
subdirs = subdirs[::-1]
|
|
1001
|
+
for parent, child in zip(subdirs, subdirs[1:]):
|
|
1002
|
+
# register newly discovered directories
|
|
1003
|
+
assert child not in self.dircache
|
|
1004
|
+
assert parent in self.dircache
|
|
1005
|
+
self.dircache[parent].append(
|
|
1006
|
+
{"name": child, "type": "directory", "size": 0}
|
|
1007
|
+
)
|
|
1008
|
+
self.dircache[child] = []
|
|
967
1009
|
|
|
968
1010
|
self.dircache[par].append({"name": path, "type": "file", "size": size})
|
|
969
1011
|
|
fsspec/implementations/sftp.py
CHANGED
|
@@ -41,14 +41,14 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
41
41
|
"""
|
|
42
42
|
if self._cached:
|
|
43
43
|
return
|
|
44
|
-
super(
|
|
44
|
+
super().__init__(**ssh_kwargs)
|
|
45
45
|
self.temppath = ssh_kwargs.pop("temppath", "/tmp") # remote temp directory
|
|
46
46
|
self.host = host
|
|
47
47
|
self.ssh_kwargs = ssh_kwargs
|
|
48
48
|
self._connect()
|
|
49
49
|
|
|
50
50
|
def _connect(self):
|
|
51
|
-
logger.debug("Connecting to SFTP server %s"
|
|
51
|
+
logger.debug("Connecting to SFTP server %s", self.host)
|
|
52
52
|
self.client = paramiko.SSHClient()
|
|
53
53
|
self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
54
54
|
self.client.connect(self.host, **self.ssh_kwargs)
|
|
@@ -66,9 +66,9 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
66
66
|
return out
|
|
67
67
|
|
|
68
68
|
def mkdir(self, path, create_parents=False, mode=511):
|
|
69
|
-
logger.debug("Creating folder %s"
|
|
69
|
+
logger.debug("Creating folder %s", path)
|
|
70
70
|
if self.exists(path):
|
|
71
|
-
raise FileExistsError("File exists: {}"
|
|
71
|
+
raise FileExistsError(f"File exists: {path}")
|
|
72
72
|
|
|
73
73
|
if create_parents:
|
|
74
74
|
self.makedirs(path)
|
|
@@ -77,18 +77,18 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
77
77
|
|
|
78
78
|
def makedirs(self, path, exist_ok=False, mode=511):
|
|
79
79
|
if self.exists(path) and not exist_ok:
|
|
80
|
-
raise FileExistsError("File exists: {}"
|
|
80
|
+
raise FileExistsError(f"File exists: {path}")
|
|
81
81
|
|
|
82
82
|
parts = path.split("/")
|
|
83
83
|
path = ""
|
|
84
84
|
|
|
85
85
|
for part in parts:
|
|
86
|
-
path += "/"
|
|
86
|
+
path += f"/{part}"
|
|
87
87
|
if not self.exists(path):
|
|
88
88
|
self.ftp.mkdir(path, mode)
|
|
89
89
|
|
|
90
90
|
def rmdir(self, path):
|
|
91
|
-
logger.debug("Removing folder %s"
|
|
91
|
+
logger.debug("Removing folder %s", path)
|
|
92
92
|
self.ftp.rmdir(path)
|
|
93
93
|
|
|
94
94
|
def info(self, path):
|
|
@@ -122,7 +122,7 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
122
122
|
return out
|
|
123
123
|
|
|
124
124
|
def ls(self, path, detail=False):
|
|
125
|
-
logger.debug("Listing folder %s"
|
|
125
|
+
logger.debug("Listing folder %s", path)
|
|
126
126
|
stats = [self._decode_stat(stat, path) for stat in self.ftp.listdir_iter(path)]
|
|
127
127
|
if detail:
|
|
128
128
|
return stats
|
|
@@ -131,7 +131,7 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
131
131
|
return sorted(paths)
|
|
132
132
|
|
|
133
133
|
def put(self, lpath, rpath, callback=None, **kwargs):
|
|
134
|
-
logger.debug("Put file %s into %s"
|
|
134
|
+
logger.debug("Put file %s into %s", lpath, rpath)
|
|
135
135
|
self.ftp.put(lpath, rpath)
|
|
136
136
|
|
|
137
137
|
def get_file(self, rpath, lpath, **kwargs):
|
|
@@ -146,7 +146,7 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
146
146
|
If 0, no buffering, if 1, line buffering, if >1, buffer that many
|
|
147
147
|
bytes, if None use default from paramiko.
|
|
148
148
|
"""
|
|
149
|
-
logger.debug("Opening file %s"
|
|
149
|
+
logger.debug("Opening file %s", path)
|
|
150
150
|
if kwargs.get("autocommit", True) is False:
|
|
151
151
|
# writes to temporary file, move on commit
|
|
152
152
|
path2 = "/".join([self.temppath, str(uuid.uuid4())])
|
|
@@ -167,7 +167,7 @@ class SFTPFileSystem(AbstractFileSystem):
|
|
|
167
167
|
self.ftp.remove(path)
|
|
168
168
|
|
|
169
169
|
def mv(self, old, new):
|
|
170
|
-
logger.debug("Renaming %s into %s"
|
|
170
|
+
logger.debug("Renaming %s into %s", old, new)
|
|
171
171
|
self.ftp.posix_rename(old, new)
|
|
172
172
|
|
|
173
173
|
|
fsspec/implementations/smb.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
"""
|
|
3
2
|
This module contains SMBFileSystem class responsible for handling access to
|
|
4
3
|
Windows Samba network shares by using package smbprotocol
|
|
@@ -103,7 +102,7 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
103
102
|
- 'w': Allow other handles to be opened with write access.
|
|
104
103
|
- 'd': Allow other handles to be opened with delete access.
|
|
105
104
|
"""
|
|
106
|
-
super(
|
|
105
|
+
super().__init__(**kwargs)
|
|
107
106
|
self.host = host
|
|
108
107
|
self.port = port
|
|
109
108
|
self.username = username
|
|
@@ -248,7 +247,7 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
248
247
|
else:
|
|
249
248
|
smbclient.remove(wpath, port=self._port)
|
|
250
249
|
|
|
251
|
-
def mv(self, path1, path2, **kwargs):
|
|
250
|
+
def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs):
|
|
252
251
|
wpath1 = _as_unc_path(self.host, path1)
|
|
253
252
|
wpath2 = _as_unc_path(self.host, path2)
|
|
254
253
|
smbclient.rename(wpath1, wpath2, port=self._port, **kwargs)
|
|
@@ -256,13 +255,13 @@ class SMBFileSystem(AbstractFileSystem):
|
|
|
256
255
|
|
|
257
256
|
def _as_unc_path(host, path):
|
|
258
257
|
rpath = path.replace("/", "\\")
|
|
259
|
-
unc = "\\\\{}{}"
|
|
258
|
+
unc = f"\\\\{host}{rpath}"
|
|
260
259
|
return unc
|
|
261
260
|
|
|
262
261
|
|
|
263
262
|
def _as_temp_path(host, path, temppath):
|
|
264
263
|
share = path.split("/")[1]
|
|
265
|
-
temp_file = "/{}{}/{
|
|
264
|
+
temp_file = f"/{share}{temppath}/{uuid.uuid4()}"
|
|
266
265
|
unc = _as_unc_path(host, temp_file)
|
|
267
266
|
return unc
|
|
268
267
|
|
|
@@ -21,7 +21,7 @@ class WebHDFS(AbstractFileSystem):
|
|
|
21
21
|
"""
|
|
22
22
|
Interface to HDFS over HTTP using the WebHDFS API. Supports also HttpFS gateways.
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
Four auth mechanisms are supported:
|
|
25
25
|
|
|
26
26
|
insecure: no auth is done, and the user is assumed to be whoever they
|
|
27
27
|
say they are (parameter ``user``), or a predefined value such as
|
|
@@ -34,6 +34,8 @@ class WebHDFS(AbstractFileSystem):
|
|
|
34
34
|
service. Indeed, this client can also generate such tokens when
|
|
35
35
|
not insecure. Note that tokens expire, but can be renewed (by a
|
|
36
36
|
previously specified user) and may allow for proxying.
|
|
37
|
+
basic-auth: used when both parameter ``user`` and parameter ``password``
|
|
38
|
+
are provided.
|
|
37
39
|
|
|
38
40
|
"""
|
|
39
41
|
|
|
@@ -47,6 +49,7 @@ class WebHDFS(AbstractFileSystem):
|
|
|
47
49
|
kerberos=False,
|
|
48
50
|
token=None,
|
|
49
51
|
user=None,
|
|
52
|
+
password=None,
|
|
50
53
|
proxy_to=None,
|
|
51
54
|
kerb_kwargs=None,
|
|
52
55
|
data_proxy=None,
|
|
@@ -68,6 +71,9 @@ class WebHDFS(AbstractFileSystem):
|
|
|
68
71
|
given
|
|
69
72
|
user: str or None
|
|
70
73
|
If given, assert the user name to connect with
|
|
74
|
+
password: str or None
|
|
75
|
+
If given, assert the password to use for basic auth. If password
|
|
76
|
+
is provided, user must be provided also
|
|
71
77
|
proxy_to: str or None
|
|
72
78
|
If given, the user has the authority to proxy, and this value is
|
|
73
79
|
the user in who's name actions are taken
|
|
@@ -89,9 +95,7 @@ class WebHDFS(AbstractFileSystem):
|
|
|
89
95
|
if self._cached:
|
|
90
96
|
return
|
|
91
97
|
super().__init__(**kwargs)
|
|
92
|
-
self.url = "{
|
|
93
|
-
protocol="https" if use_https else "http", host=host, port=port
|
|
94
|
-
)
|
|
98
|
+
self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1"
|
|
95
99
|
self.kerb = kerberos
|
|
96
100
|
self.kerb_kwargs = kerb_kwargs or {}
|
|
97
101
|
self.pars = {}
|
|
@@ -104,8 +108,19 @@ class WebHDFS(AbstractFileSystem):
|
|
|
104
108
|
" token"
|
|
105
109
|
)
|
|
106
110
|
self.pars["delegation"] = token
|
|
107
|
-
|
|
108
|
-
|
|
111
|
+
self.user = user
|
|
112
|
+
self.password = password
|
|
113
|
+
|
|
114
|
+
if password is not None:
|
|
115
|
+
if user is None:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
"If passing a password, the user must also be"
|
|
118
|
+
"set in order to set up the basic-auth"
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
if user is not None:
|
|
122
|
+
self.pars["user.name"] = user
|
|
123
|
+
|
|
109
124
|
if proxy_to is not None:
|
|
110
125
|
self.pars["doas"] = proxy_to
|
|
111
126
|
if kerberos and user is not None:
|
|
@@ -115,7 +130,7 @@ class WebHDFS(AbstractFileSystem):
|
|
|
115
130
|
)
|
|
116
131
|
self._connect()
|
|
117
132
|
|
|
118
|
-
self._fsid = "webhdfs_
|
|
133
|
+
self._fsid = f"webhdfs_{tokenize(host, port)}"
|
|
119
134
|
|
|
120
135
|
@property
|
|
121
136
|
def fsid(self):
|
|
@@ -128,8 +143,13 @@ class WebHDFS(AbstractFileSystem):
|
|
|
128
143
|
|
|
129
144
|
self.session.auth = HTTPKerberosAuth(**self.kerb_kwargs)
|
|
130
145
|
|
|
146
|
+
if self.user is not None and self.password is not None:
|
|
147
|
+
from requests.auth import HTTPBasicAuth
|
|
148
|
+
|
|
149
|
+
self.session.auth = HTTPBasicAuth(self.user, self.password)
|
|
150
|
+
|
|
131
151
|
def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
|
|
132
|
-
url = self.url + quote(path or "")
|
|
152
|
+
url = self._apply_proxy(self.url + quote(path or "", safe="/="))
|
|
133
153
|
args = kwargs.copy()
|
|
134
154
|
args.update(self.pars)
|
|
135
155
|
args["op"] = op.upper()
|
fsspec/implementations/zip.py
CHANGED
|
@@ -113,10 +113,10 @@ class ZipFileSystem(AbstractArchiveFileSystem):
|
|
|
113
113
|
path = self._strip_protocol(path)
|
|
114
114
|
if "r" in mode and self.mode in set("wa"):
|
|
115
115
|
if self.exists(path):
|
|
116
|
-
raise
|
|
116
|
+
raise OSError("ZipFS can only be open for reading or writing, not both")
|
|
117
117
|
raise FileNotFoundError(path)
|
|
118
118
|
if "r" in self.mode and "w" in mode:
|
|
119
|
-
raise
|
|
119
|
+
raise OSError("ZipFS can only be open for reading or writing, not both")
|
|
120
120
|
out = self.zip.open(path, mode.strip("b"))
|
|
121
121
|
if "r" in mode:
|
|
122
122
|
info = self.info(path)
|
fsspec/mapping.py
CHANGED
|
@@ -54,8 +54,8 @@ class FSMap(MutableMapping):
|
|
|
54
54
|
if check:
|
|
55
55
|
if not self.fs.exists(root):
|
|
56
56
|
raise ValueError(
|
|
57
|
-
"Path
|
|
58
|
-
" with the ``create=True`` keyword"
|
|
57
|
+
f"Path {root} does not exist. Create "
|
|
58
|
+
f" with the ``create=True`` keyword"
|
|
59
59
|
)
|
|
60
60
|
self.fs.touch(root + "/a")
|
|
61
61
|
self.fs.rm(root + "/a")
|
fsspec/registry.py
CHANGED
|
@@ -38,29 +38,31 @@ def register_implementation(name, cls, clobber=False, errtxt=None):
|
|
|
38
38
|
if name in known_implementations and clobber is False:
|
|
39
39
|
if cls != known_implementations[name]["class"]:
|
|
40
40
|
raise ValueError(
|
|
41
|
-
"Name (
|
|
42
|
-
"is False"
|
|
41
|
+
f"Name ({name}) already in the known_implementations and clobber "
|
|
42
|
+
f"is False"
|
|
43
43
|
)
|
|
44
44
|
else:
|
|
45
45
|
known_implementations[name] = {
|
|
46
46
|
"class": cls,
|
|
47
|
-
"err": errtxt or "
|
|
47
|
+
"err": errtxt or f"{cls} import failed for protocol {name}",
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
else:
|
|
51
51
|
if name in registry and clobber is False:
|
|
52
52
|
if _registry[name] is not cls:
|
|
53
53
|
raise ValueError(
|
|
54
|
-
"Name (
|
|
54
|
+
f"Name ({name}) already in the registry and clobber is False"
|
|
55
55
|
)
|
|
56
56
|
else:
|
|
57
57
|
_registry[name] = cls
|
|
58
58
|
|
|
59
59
|
|
|
60
|
-
# protocols mapped to the class which implements them. This dict can
|
|
60
|
+
# protocols mapped to the class which implements them. This dict can be
|
|
61
61
|
# updated with register_implementation
|
|
62
62
|
known_implementations = {
|
|
63
|
+
"data": {"class": "fsspec.implementations.data.DataFileSystem"},
|
|
63
64
|
"file": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
|
65
|
+
"local": {"class": "fsspec.implementations.local.LocalFileSystem"},
|
|
64
66
|
"memory": {"class": "fsspec.implementations.memory.MemoryFileSystem"},
|
|
65
67
|
"dropbox": {
|
|
66
68
|
"class": "dropboxdrivefs.DropboxDriveFileSystem",
|
|
@@ -228,7 +230,7 @@ def get_filesystem_class(protocol):
|
|
|
228
230
|
|
|
229
231
|
if protocol not in registry:
|
|
230
232
|
if protocol not in known_implementations:
|
|
231
|
-
raise ValueError("Protocol not known:
|
|
233
|
+
raise ValueError(f"Protocol not known: {protocol}")
|
|
232
234
|
bit = known_implementations[protocol]
|
|
233
235
|
try:
|
|
234
236
|
register_implementation(protocol, _import_class(bit["class"]))
|