fsspec 2024.5.0__py3-none-any.whl → 2024.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/_version.py +2 -2
- fsspec/caching.py +3 -2
- fsspec/compression.py +1 -1
- fsspec/generic.py +3 -0
- fsspec/implementations/cached.py +6 -16
- fsspec/implementations/dirfs.py +2 -0
- fsspec/implementations/github.py +12 -0
- fsspec/implementations/http.py +2 -1
- fsspec/implementations/reference.py +9 -0
- fsspec/implementations/smb.py +10 -0
- fsspec/json.py +121 -0
- fsspec/registry.py +24 -18
- fsspec/spec.py +119 -33
- fsspec/utils.py +1 -1
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/METADATA +10 -5
- fsspec-2024.6.1.dist-info/RECORD +55 -0
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/WHEEL +1 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
- fsspec/implementations/tests/conftest.py +0 -39
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +0 -18
- fsspec/implementations/tests/local/local_test.py +0 -14
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
- fsspec/implementations/tests/memory/memory_test.py +0 -14
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +0 -382
- fsspec/implementations/tests/test_arrow.py +0 -259
- fsspec/implementations/tests/test_cached.py +0 -1306
- fsspec/implementations/tests/test_common.py +0 -35
- fsspec/implementations/tests/test_dask.py +0 -29
- fsspec/implementations/tests/test_data.py +0 -20
- fsspec/implementations/tests/test_dbfs.py +0 -268
- fsspec/implementations/tests/test_dirfs.py +0 -588
- fsspec/implementations/tests/test_ftp.py +0 -178
- fsspec/implementations/tests/test_git.py +0 -76
- fsspec/implementations/tests/test_http.py +0 -577
- fsspec/implementations/tests/test_jupyter.py +0 -57
- fsspec/implementations/tests/test_libarchive.py +0 -33
- fsspec/implementations/tests/test_local.py +0 -1285
- fsspec/implementations/tests/test_memory.py +0 -382
- fsspec/implementations/tests/test_reference.py +0 -720
- fsspec/implementations/tests/test_sftp.py +0 -233
- fsspec/implementations/tests/test_smb.py +0 -139
- fsspec/implementations/tests/test_tar.py +0 -243
- fsspec/implementations/tests/test_webhdfs.py +0 -197
- fsspec/implementations/tests/test_zip.py +0 -134
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/conftest.py +0 -188
- fsspec/tests/data/listing.html +0 -1
- fsspec/tests/test_api.py +0 -498
- fsspec/tests/test_async.py +0 -230
- fsspec/tests/test_caches.py +0 -255
- fsspec/tests/test_callbacks.py +0 -89
- fsspec/tests/test_compression.py +0 -164
- fsspec/tests/test_config.py +0 -129
- fsspec/tests/test_core.py +0 -466
- fsspec/tests/test_downstream.py +0 -40
- fsspec/tests/test_file.py +0 -200
- fsspec/tests/test_fuse.py +0 -147
- fsspec/tests/test_generic.py +0 -90
- fsspec/tests/test_gui.py +0 -23
- fsspec/tests/test_mapping.py +0 -228
- fsspec/tests/test_parquet.py +0 -140
- fsspec/tests/test_registry.py +0 -134
- fsspec/tests/test_spec.py +0 -1167
- fsspec/tests/test_utils.py +0 -478
- fsspec-2024.5.0.dist-info/RECORD +0 -111
- {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
from fsspec import filesystem
|
|
4
|
-
from fsspec.tests.abstract import AbstractFixtures
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class MemoryFixtures(AbstractFixtures):
|
|
8
|
-
@pytest.fixture(scope="class")
|
|
9
|
-
def fs(self):
|
|
10
|
-
m = filesystem("memory")
|
|
11
|
-
m.store.clear()
|
|
12
|
-
m.pseudo_dirs.clear()
|
|
13
|
-
m.pseudo_dirs.append("")
|
|
14
|
-
try:
|
|
15
|
-
yield m
|
|
16
|
-
finally:
|
|
17
|
-
m.store.clear()
|
|
18
|
-
m.pseudo_dirs.clear()
|
|
19
|
-
m.pseudo_dirs.append("")
|
|
20
|
-
|
|
21
|
-
@pytest.fixture
|
|
22
|
-
def fs_join(self):
|
|
23
|
-
return lambda *args: "/".join(args)
|
|
24
|
-
|
|
25
|
-
@pytest.fixture
|
|
26
|
-
def fs_path(self):
|
|
27
|
-
return ""
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import fsspec.tests.abstract as abstract
|
|
2
|
-
from fsspec.implementations.tests.memory.memory_fixtures import MemoryFixtures
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class TestMemoryCopy(abstract.AbstractCopyTests, MemoryFixtures):
|
|
6
|
-
pass
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestMemoryGet(abstract.AbstractGetTests, MemoryFixtures):
|
|
10
|
-
pass
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class TestMemoryPut(abstract.AbstractPutTests, MemoryFixtures):
|
|
14
|
-
pass
|
|
Binary file
|
|
@@ -1,382 +0,0 @@
|
|
|
1
|
-
import bz2
|
|
2
|
-
import gzip
|
|
3
|
-
import lzma
|
|
4
|
-
import os
|
|
5
|
-
import pickle
|
|
6
|
-
import tarfile
|
|
7
|
-
import tempfile
|
|
8
|
-
import zipfile
|
|
9
|
-
from contextlib import contextmanager
|
|
10
|
-
from io import BytesIO
|
|
11
|
-
|
|
12
|
-
import pytest
|
|
13
|
-
|
|
14
|
-
import fsspec
|
|
15
|
-
|
|
16
|
-
# The blueprint to create synthesized archive files from.
|
|
17
|
-
archive_data = {"a": b"", "b": b"hello", "deeply/nested/path": b"stuff"}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@contextmanager
|
|
21
|
-
def tempzip(data=None):
|
|
22
|
-
"""
|
|
23
|
-
Provide test cases with temporary synthesized Zip archives.
|
|
24
|
-
"""
|
|
25
|
-
data = data or {}
|
|
26
|
-
f = tempfile.mkstemp(suffix=".zip")[1]
|
|
27
|
-
with zipfile.ZipFile(f, mode="w") as z:
|
|
28
|
-
for k, v in data.items():
|
|
29
|
-
z.writestr(k, v)
|
|
30
|
-
try:
|
|
31
|
-
yield f
|
|
32
|
-
finally:
|
|
33
|
-
try:
|
|
34
|
-
os.remove(f)
|
|
35
|
-
except OSError:
|
|
36
|
-
pass
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
@contextmanager
|
|
40
|
-
def temparchive(data=None):
|
|
41
|
-
"""
|
|
42
|
-
Provide test cases with temporary synthesized 7-Zip archives.
|
|
43
|
-
"""
|
|
44
|
-
data = data or {}
|
|
45
|
-
libarchive = pytest.importorskip("libarchive")
|
|
46
|
-
f = tempfile.mkstemp(suffix=".7z")[1]
|
|
47
|
-
with libarchive.file_writer(f, "7zip") as archive:
|
|
48
|
-
for k, v in data.items():
|
|
49
|
-
archive.add_file_from_memory(entry_path=k, entry_size=len(v), entry_data=v)
|
|
50
|
-
try:
|
|
51
|
-
yield f
|
|
52
|
-
finally:
|
|
53
|
-
try:
|
|
54
|
-
os.remove(f)
|
|
55
|
-
except OSError:
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
@contextmanager
|
|
60
|
-
def temptar(data=None, mode="w", suffix=".tar"):
|
|
61
|
-
"""
|
|
62
|
-
Provide test cases with temporary synthesized .tar archives.
|
|
63
|
-
"""
|
|
64
|
-
data = data or {}
|
|
65
|
-
fn = tempfile.mkstemp(suffix=suffix)[1]
|
|
66
|
-
with tarfile.TarFile.open(fn, mode=mode) as t:
|
|
67
|
-
touched = {}
|
|
68
|
-
for name, data in data.items():
|
|
69
|
-
# Create directory hierarchy.
|
|
70
|
-
# https://bugs.python.org/issue22208#msg225558
|
|
71
|
-
if "/" in name and name not in touched:
|
|
72
|
-
parts = os.path.dirname(name).split("/")
|
|
73
|
-
for index in range(1, len(parts) + 1):
|
|
74
|
-
info = tarfile.TarInfo("/".join(parts[:index]))
|
|
75
|
-
info.type = tarfile.DIRTYPE
|
|
76
|
-
t.addfile(info)
|
|
77
|
-
touched[name] = True
|
|
78
|
-
|
|
79
|
-
# Add file content.
|
|
80
|
-
info = tarfile.TarInfo(name=name)
|
|
81
|
-
info.size = len(data)
|
|
82
|
-
t.addfile(info, BytesIO(data))
|
|
83
|
-
|
|
84
|
-
try:
|
|
85
|
-
yield fn
|
|
86
|
-
finally:
|
|
87
|
-
try:
|
|
88
|
-
os.remove(fn)
|
|
89
|
-
except OSError:
|
|
90
|
-
pass
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
@contextmanager
|
|
94
|
-
def temptargz(data=None, mode="w", suffix=".tar.gz"):
|
|
95
|
-
"""
|
|
96
|
-
Provide test cases with temporary synthesized .tar.gz archives.
|
|
97
|
-
"""
|
|
98
|
-
|
|
99
|
-
with temptar(data=data, mode=mode) as tarname:
|
|
100
|
-
fn = tempfile.mkstemp(suffix=suffix)[1]
|
|
101
|
-
with open(tarname, "rb") as tar:
|
|
102
|
-
cf = gzip.GzipFile(filename=fn, mode=mode)
|
|
103
|
-
cf.write(tar.read())
|
|
104
|
-
cf.close()
|
|
105
|
-
|
|
106
|
-
try:
|
|
107
|
-
yield fn
|
|
108
|
-
finally:
|
|
109
|
-
try:
|
|
110
|
-
os.remove(fn)
|
|
111
|
-
except OSError:
|
|
112
|
-
pass
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
@contextmanager
|
|
116
|
-
def temptarbz2(data=None, mode="w", suffix=".tar.bz2"):
|
|
117
|
-
"""
|
|
118
|
-
Provide test cases with temporary synthesized .tar.bz2 archives.
|
|
119
|
-
"""
|
|
120
|
-
|
|
121
|
-
with temptar(data=data, mode=mode) as tarname:
|
|
122
|
-
fn = tempfile.mkstemp(suffix=suffix)[1]
|
|
123
|
-
with open(tarname, "rb") as tar:
|
|
124
|
-
cf = bz2.BZ2File(filename=fn, mode=mode)
|
|
125
|
-
cf.write(tar.read())
|
|
126
|
-
cf.close()
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
yield fn
|
|
130
|
-
finally:
|
|
131
|
-
try:
|
|
132
|
-
os.remove(fn)
|
|
133
|
-
except OSError:
|
|
134
|
-
pass
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
@contextmanager
|
|
138
|
-
def temptarxz(data=None, mode="w", suffix=".tar.xz"):
|
|
139
|
-
"""
|
|
140
|
-
Provide test cases with temporary synthesized .tar.xz archives.
|
|
141
|
-
"""
|
|
142
|
-
|
|
143
|
-
with temptar(data=data, mode=mode) as tarname:
|
|
144
|
-
fn = tempfile.mkstemp(suffix=suffix)[1]
|
|
145
|
-
with open(tarname, "rb") as tar:
|
|
146
|
-
cf = lzma.open(filename=fn, mode=mode, format=lzma.FORMAT_XZ)
|
|
147
|
-
cf.write(tar.read())
|
|
148
|
-
cf.close()
|
|
149
|
-
|
|
150
|
-
try:
|
|
151
|
-
yield fn
|
|
152
|
-
finally:
|
|
153
|
-
try:
|
|
154
|
-
os.remove(fn)
|
|
155
|
-
except OSError:
|
|
156
|
-
pass
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
class ArchiveTestScenario:
|
|
160
|
-
"""
|
|
161
|
-
Describe a test scenario for any type of archive.
|
|
162
|
-
"""
|
|
163
|
-
|
|
164
|
-
def __init__(self, protocol=None, provider=None, variant=None):
|
|
165
|
-
# The filesystem protocol identifier. Any of "zip", "tar" or "libarchive".
|
|
166
|
-
self.protocol = protocol
|
|
167
|
-
# A contextmanager function to provide temporary synthesized archives.
|
|
168
|
-
self.provider = provider
|
|
169
|
-
# The filesystem protocol variant identifier. Any of "gz", "bz2" or "xz".
|
|
170
|
-
self.variant = variant
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def pytest_generate_tests(metafunc):
|
|
174
|
-
"""
|
|
175
|
-
Generate test scenario parametrization arguments with appropriate labels (idlist).
|
|
176
|
-
|
|
177
|
-
On the one hand, this yields an appropriate output like::
|
|
178
|
-
|
|
179
|
-
fsspec/implementations/tests/test_archive.py::TestArchive::test_empty[zip] PASSED # noqa
|
|
180
|
-
|
|
181
|
-
On the other hand, it will support perfect test discovery, like::
|
|
182
|
-
|
|
183
|
-
pytest fsspec -vvv -k "zip or tar or libarchive"
|
|
184
|
-
|
|
185
|
-
https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios
|
|
186
|
-
"""
|
|
187
|
-
idlist = []
|
|
188
|
-
argnames = ["scenario"]
|
|
189
|
-
argvalues = []
|
|
190
|
-
for scenario in metafunc.cls.scenarios:
|
|
191
|
-
scenario: ArchiveTestScenario = scenario
|
|
192
|
-
label = scenario.protocol
|
|
193
|
-
if scenario.variant:
|
|
194
|
-
label += "-" + scenario.variant
|
|
195
|
-
idlist.append(label)
|
|
196
|
-
argvalues.append([scenario])
|
|
197
|
-
metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
# Define test scenarios.
|
|
201
|
-
scenario_zip = ArchiveTestScenario(protocol="zip", provider=tempzip)
|
|
202
|
-
scenario_tar = ArchiveTestScenario(protocol="tar", provider=temptar)
|
|
203
|
-
scenario_targz = ArchiveTestScenario(protocol="tar", provider=temptargz, variant="gz")
|
|
204
|
-
scenario_tarbz2 = ArchiveTestScenario(
|
|
205
|
-
protocol="tar", provider=temptarbz2, variant="bz2"
|
|
206
|
-
)
|
|
207
|
-
scenario_tarxz = ArchiveTestScenario(protocol="tar", provider=temptarxz, variant="xz")
|
|
208
|
-
scenario_libarchive = ArchiveTestScenario(protocol="libarchive", provider=temparchive)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
class TestAnyArchive:
|
|
212
|
-
"""
|
|
213
|
-
Validate that all filesystem adapter implementations for archive files
|
|
214
|
-
will adhere to the same specification.
|
|
215
|
-
"""
|
|
216
|
-
|
|
217
|
-
scenarios = [
|
|
218
|
-
scenario_zip,
|
|
219
|
-
scenario_tar,
|
|
220
|
-
scenario_targz,
|
|
221
|
-
scenario_tarbz2,
|
|
222
|
-
scenario_tarxz,
|
|
223
|
-
scenario_libarchive,
|
|
224
|
-
]
|
|
225
|
-
|
|
226
|
-
def test_repr(self, scenario: ArchiveTestScenario):
|
|
227
|
-
with scenario.provider() as archive:
|
|
228
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
229
|
-
assert repr(fs).startswith("<Archive-like object")
|
|
230
|
-
|
|
231
|
-
def test_empty(self, scenario: ArchiveTestScenario):
|
|
232
|
-
with scenario.provider() as archive:
|
|
233
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
234
|
-
assert fs.find("") == []
|
|
235
|
-
assert fs.find("", withdirs=True) == []
|
|
236
|
-
with pytest.raises(FileNotFoundError):
|
|
237
|
-
fs.info("")
|
|
238
|
-
assert fs.ls("") == []
|
|
239
|
-
|
|
240
|
-
def test_glob(self, scenario: ArchiveTestScenario):
|
|
241
|
-
with scenario.provider(archive_data) as archive:
|
|
242
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
243
|
-
assert fs.glob("*/*/*th") == ["deeply/nested/path"]
|
|
244
|
-
|
|
245
|
-
def test_mapping(self, scenario: ArchiveTestScenario):
|
|
246
|
-
with scenario.provider(archive_data) as archive:
|
|
247
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
248
|
-
m = fs.get_mapper()
|
|
249
|
-
assert list(m) == ["a", "b", "deeply/nested/path"]
|
|
250
|
-
assert m["b"] == archive_data["b"]
|
|
251
|
-
|
|
252
|
-
def test_pickle(self, scenario: ArchiveTestScenario):
|
|
253
|
-
with scenario.provider(archive_data) as archive:
|
|
254
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
255
|
-
fs2 = pickle.loads(pickle.dumps(fs))
|
|
256
|
-
assert fs2.cat("b") == b"hello"
|
|
257
|
-
|
|
258
|
-
def test_all_dirnames(self, scenario: ArchiveTestScenario):
|
|
259
|
-
with scenario.provider(archive_data) as archive:
|
|
260
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
261
|
-
|
|
262
|
-
# fx are files, dx are a directories
|
|
263
|
-
assert fs._all_dirnames([]) == set()
|
|
264
|
-
assert fs._all_dirnames(["f1"]) == set()
|
|
265
|
-
assert fs._all_dirnames(["f1", "f2"]) == set()
|
|
266
|
-
assert fs._all_dirnames(["f1", "f2", "d1/f1"]) == {"d1"}
|
|
267
|
-
assert fs._all_dirnames(["f1", "d1/f1", "d1/f2"]) == {"d1"}
|
|
268
|
-
assert fs._all_dirnames(["f1", "d1/f1", "d2/f1"]) == {"d1", "d2"}
|
|
269
|
-
assert fs._all_dirnames(["d1/d1/d1/f1"]) == {"d1", "d1/d1", "d1/d1/d1"}
|
|
270
|
-
|
|
271
|
-
def test_ls(self, scenario: ArchiveTestScenario):
|
|
272
|
-
with scenario.provider(archive_data) as archive:
|
|
273
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
274
|
-
|
|
275
|
-
assert fs.ls("", detail=False) == ["a", "b", "deeply"]
|
|
276
|
-
assert fs.ls("/") == fs.ls("")
|
|
277
|
-
|
|
278
|
-
assert fs.ls("deeply", detail=False) == ["deeply/nested"]
|
|
279
|
-
assert fs.ls("deeply/") == fs.ls("deeply")
|
|
280
|
-
|
|
281
|
-
assert fs.ls("deeply/nested", detail=False) == ["deeply/nested/path"]
|
|
282
|
-
assert fs.ls("deeply/nested/") == fs.ls("deeply/nested")
|
|
283
|
-
|
|
284
|
-
def test_find(self, scenario: ArchiveTestScenario):
|
|
285
|
-
with scenario.provider(archive_data) as archive:
|
|
286
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
287
|
-
|
|
288
|
-
assert fs.find("") == ["a", "b", "deeply/nested/path"]
|
|
289
|
-
assert fs.find("", withdirs=True) == [
|
|
290
|
-
"a",
|
|
291
|
-
"b",
|
|
292
|
-
"deeply",
|
|
293
|
-
"deeply/nested",
|
|
294
|
-
"deeply/nested/path",
|
|
295
|
-
]
|
|
296
|
-
|
|
297
|
-
assert fs.find("deeply") == ["deeply/nested/path"]
|
|
298
|
-
assert fs.find("deeply/") == fs.find("deeply")
|
|
299
|
-
|
|
300
|
-
@pytest.mark.parametrize("topdown", [True, False])
|
|
301
|
-
@pytest.mark.parametrize("prune_nested", [True, False])
|
|
302
|
-
def test_walk(self, scenario: ArchiveTestScenario, topdown, prune_nested):
|
|
303
|
-
with scenario.provider(archive_data) as archive:
|
|
304
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
305
|
-
expected = [
|
|
306
|
-
# (dirname, list of subdirs, list of files)
|
|
307
|
-
("", ["deeply"], ["a", "b"]),
|
|
308
|
-
("deeply", ["nested"], []),
|
|
309
|
-
]
|
|
310
|
-
if not topdown or not prune_nested:
|
|
311
|
-
expected.append(("deeply/nested", [], ["path"]))
|
|
312
|
-
if not topdown:
|
|
313
|
-
expected.reverse()
|
|
314
|
-
|
|
315
|
-
result = []
|
|
316
|
-
for path, dirs, files in fs.walk("", topdown=topdown):
|
|
317
|
-
result.append((path, dirs.copy(), files))
|
|
318
|
-
# Bypass the "nested" dir
|
|
319
|
-
if prune_nested and "nested" in dirs:
|
|
320
|
-
dirs.remove("nested")
|
|
321
|
-
|
|
322
|
-
# prior py3.10 zip() does not support strict=True, we need
|
|
323
|
-
# a manual len check here
|
|
324
|
-
assert len(result) == len(expected)
|
|
325
|
-
for lhs, rhs in zip(result, expected):
|
|
326
|
-
assert lhs[0] == rhs[0]
|
|
327
|
-
assert sorted(lhs[1]) == sorted(rhs[1])
|
|
328
|
-
assert sorted(lhs[2]) == sorted(rhs[2])
|
|
329
|
-
|
|
330
|
-
def test_info(self, scenario: ArchiveTestScenario):
|
|
331
|
-
# https://github.com/Suor/funcy/blob/1.15/funcy/colls.py#L243-L245
|
|
332
|
-
def project(mapping, keys):
|
|
333
|
-
"""Leaves only given keys in mapping."""
|
|
334
|
-
return {k: mapping[k] for k in keys if k in mapping}
|
|
335
|
-
|
|
336
|
-
with scenario.provider(archive_data) as archive:
|
|
337
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
338
|
-
|
|
339
|
-
with pytest.raises(FileNotFoundError):
|
|
340
|
-
fs.info("i-do-not-exist")
|
|
341
|
-
|
|
342
|
-
# Iterate over all directories.
|
|
343
|
-
for d in fs._all_dirnames(archive_data.keys()):
|
|
344
|
-
lhs = project(fs.info(d), ["name", "size", "type"])
|
|
345
|
-
expected = {"name": f"{d}", "size": 0, "type": "directory"}
|
|
346
|
-
assert lhs == expected
|
|
347
|
-
|
|
348
|
-
# Iterate over all files.
|
|
349
|
-
for f, v in archive_data.items():
|
|
350
|
-
lhs = fs.info(f)
|
|
351
|
-
assert lhs["name"] == f
|
|
352
|
-
assert lhs["size"] == len(v)
|
|
353
|
-
assert lhs["type"] == "file"
|
|
354
|
-
|
|
355
|
-
@pytest.mark.parametrize("scale", [128, 512, 4096])
|
|
356
|
-
def test_isdir_isfile(self, scenario: ArchiveTestScenario, scale: int):
|
|
357
|
-
def make_nested_dir(i):
|
|
358
|
-
x = f"{i}"
|
|
359
|
-
table = x.maketrans("0123456789", "ABCDEFGHIJ")
|
|
360
|
-
return "/".join(x.translate(table))
|
|
361
|
-
|
|
362
|
-
scaled_data = {f"{make_nested_dir(i)}/{i}": b"" for i in range(1, scale + 1)}
|
|
363
|
-
with scenario.provider(scaled_data) as archive:
|
|
364
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
365
|
-
|
|
366
|
-
lhs_dirs, lhs_files = (
|
|
367
|
-
fs._all_dirnames(scaled_data.keys()),
|
|
368
|
-
scaled_data.keys(),
|
|
369
|
-
)
|
|
370
|
-
|
|
371
|
-
# Warm-up the Cache, this is done in both cases anyways...
|
|
372
|
-
fs._get_dirs()
|
|
373
|
-
|
|
374
|
-
entries = lhs_files | lhs_dirs
|
|
375
|
-
|
|
376
|
-
assert lhs_dirs == {e for e in entries if fs.isdir(e)}
|
|
377
|
-
assert lhs_files == {e for e in entries if fs.isfile(e)}
|
|
378
|
-
|
|
379
|
-
def test_read_empty_file(self, scenario: ArchiveTestScenario):
|
|
380
|
-
with scenario.provider(archive_data) as archive:
|
|
381
|
-
fs = fsspec.filesystem(scenario.protocol, fo=archive)
|
|
382
|
-
assert fs.open("a").read() == b""
|
|
@@ -1,259 +0,0 @@
|
|
|
1
|
-
import secrets
|
|
2
|
-
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
pyarrow_fs = pytest.importorskip("pyarrow.fs")
|
|
6
|
-
FileSystem = pyarrow_fs.FileSystem
|
|
7
|
-
|
|
8
|
-
from fsspec.implementations.arrow import ArrowFSWrapper, HadoopFileSystem # noqa
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@pytest.fixture(scope="function")
|
|
12
|
-
def fs():
|
|
13
|
-
fs, _ = FileSystem.from_uri("mock://")
|
|
14
|
-
return ArrowFSWrapper(fs)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@pytest.fixture(scope="function", params=[False, True])
|
|
18
|
-
def remote_dir(fs, request):
|
|
19
|
-
directory = secrets.token_hex(16)
|
|
20
|
-
fs.makedirs(directory)
|
|
21
|
-
yield ("hdfs://" if request.param else "/") + directory
|
|
22
|
-
fs.rm(directory, recursive=True)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def test_protocol():
|
|
26
|
-
fs, _ = FileSystem.from_uri("mock://")
|
|
27
|
-
fss = ArrowFSWrapper(fs)
|
|
28
|
-
assert fss.protocol == "mock"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def strip_keys(original_entry):
|
|
32
|
-
entry = original_entry.copy()
|
|
33
|
-
entry.pop("mtime")
|
|
34
|
-
return entry
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def test_strip(fs):
|
|
38
|
-
assert fs._strip_protocol("/a/file") == "/a/file"
|
|
39
|
-
assert fs._strip_protocol("hdfs:///a/file") == "/a/file"
|
|
40
|
-
assert fs._strip_protocol("hdfs://1.1.1.1/a/file") == "/a/file"
|
|
41
|
-
assert fs._strip_protocol("hdfs://1.1.1.1:8888/a/file") == "/a/file"
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def test_info(fs, remote_dir):
|
|
45
|
-
fs.touch(remote_dir + "/a.txt")
|
|
46
|
-
remote_dir_strip_protocol = fs._strip_protocol(remote_dir)
|
|
47
|
-
details = fs.info(remote_dir + "/a.txt")
|
|
48
|
-
assert details["type"] == "file"
|
|
49
|
-
assert details["name"] == remote_dir_strip_protocol + "/a.txt"
|
|
50
|
-
assert details["size"] == 0
|
|
51
|
-
|
|
52
|
-
fs.mkdir(remote_dir + "/dir")
|
|
53
|
-
details = fs.info(remote_dir + "/dir")
|
|
54
|
-
assert details["type"] == "directory"
|
|
55
|
-
assert details["name"] == remote_dir_strip_protocol + "/dir"
|
|
56
|
-
|
|
57
|
-
details = fs.info(remote_dir + "/dir/")
|
|
58
|
-
assert details["name"] == remote_dir_strip_protocol + "/dir/"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def test_move(fs, remote_dir):
|
|
62
|
-
fs.touch(remote_dir + "/a.txt")
|
|
63
|
-
initial_info = fs.info(remote_dir + "/a.txt")
|
|
64
|
-
|
|
65
|
-
fs.move(remote_dir + "/a.txt", remote_dir + "/b.txt")
|
|
66
|
-
secondary_info = fs.info(remote_dir + "/b.txt")
|
|
67
|
-
|
|
68
|
-
assert not fs.exists(remote_dir + "/a.txt")
|
|
69
|
-
assert fs.exists(remote_dir + "/b.txt")
|
|
70
|
-
|
|
71
|
-
initial_info.pop("name")
|
|
72
|
-
secondary_info.pop("name")
|
|
73
|
-
assert initial_info == secondary_info
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def test_move_recursive(fs, remote_dir):
|
|
77
|
-
src = remote_dir + "/src"
|
|
78
|
-
dest = remote_dir + "/dest"
|
|
79
|
-
|
|
80
|
-
assert fs.isdir(src) is False
|
|
81
|
-
fs.mkdir(src)
|
|
82
|
-
assert fs.isdir(src)
|
|
83
|
-
|
|
84
|
-
fs.touch(src + "/a.txt")
|
|
85
|
-
fs.mkdir(src + "/b")
|
|
86
|
-
fs.touch(src + "/b/c.txt")
|
|
87
|
-
fs.move(src, dest, recursive=True)
|
|
88
|
-
|
|
89
|
-
assert fs.isdir(src) is False
|
|
90
|
-
assert not fs.exists(src)
|
|
91
|
-
|
|
92
|
-
assert fs.isdir(dest)
|
|
93
|
-
assert fs.exists(dest)
|
|
94
|
-
assert fs.cat(dest + "/b/c.txt") == fs.cat(dest + "/a.txt") == b""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def test_copy(fs, remote_dir):
|
|
98
|
-
fs.touch(remote_dir + "/a.txt")
|
|
99
|
-
initial_info = fs.info(remote_dir + "/a.txt")
|
|
100
|
-
|
|
101
|
-
fs.copy(remote_dir + "/a.txt", remote_dir + "/b.txt")
|
|
102
|
-
secondary_info = fs.info(remote_dir + "/b.txt")
|
|
103
|
-
|
|
104
|
-
assert fs.exists(remote_dir + "/a.txt")
|
|
105
|
-
assert fs.exists(remote_dir + "/b.txt")
|
|
106
|
-
|
|
107
|
-
initial_info.pop("name")
|
|
108
|
-
secondary_info.pop("name")
|
|
109
|
-
assert strip_keys(initial_info) == strip_keys(secondary_info)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def test_rm(fs, remote_dir):
|
|
113
|
-
fs.touch(remote_dir + "/a.txt")
|
|
114
|
-
fs.rm(remote_dir + "/a.txt", recursive=True)
|
|
115
|
-
assert not fs.exists(remote_dir + "/a.txt")
|
|
116
|
-
|
|
117
|
-
fs.mkdir(remote_dir + "/dir")
|
|
118
|
-
fs.rm(remote_dir + "/dir", recursive=True)
|
|
119
|
-
assert not fs.exists(remote_dir + "/dir")
|
|
120
|
-
|
|
121
|
-
fs.mkdir(remote_dir + "/dir")
|
|
122
|
-
fs.touch(remote_dir + "/dir/a")
|
|
123
|
-
fs.touch(remote_dir + "/dir/b")
|
|
124
|
-
fs.mkdir(remote_dir + "/dir/c/")
|
|
125
|
-
fs.touch(remote_dir + "/dir/c/a")
|
|
126
|
-
fs.rm(remote_dir + "/dir", recursive=True)
|
|
127
|
-
assert not fs.exists(remote_dir + "/dir")
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def test_ls(fs, remote_dir):
|
|
131
|
-
if remote_dir != "/":
|
|
132
|
-
remote_dir = remote_dir + "/"
|
|
133
|
-
remote_dir_strip_protocol = fs._strip_protocol(remote_dir)
|
|
134
|
-
fs.mkdir(remote_dir + "dir/")
|
|
135
|
-
files = set()
|
|
136
|
-
for no in range(8):
|
|
137
|
-
file = remote_dir + f"dir/test_{no}"
|
|
138
|
-
# we also want to make sure `fs.touch` works with protocol
|
|
139
|
-
fs.touch(file)
|
|
140
|
-
files.add(remote_dir_strip_protocol + f"dir/test_{no}")
|
|
141
|
-
|
|
142
|
-
assert set(fs.ls(remote_dir + "dir/")) == files
|
|
143
|
-
|
|
144
|
-
dirs = fs.ls(remote_dir + "dir/", detail=True)
|
|
145
|
-
expected = [fs.info(file) for file in files]
|
|
146
|
-
|
|
147
|
-
by_name = lambda details: details["name"]
|
|
148
|
-
dirs.sort(key=by_name)
|
|
149
|
-
expected.sort(key=by_name)
|
|
150
|
-
|
|
151
|
-
assert dirs == expected
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def test_mkdir(fs, remote_dir):
|
|
155
|
-
if remote_dir != "/":
|
|
156
|
-
remote_dir = remote_dir + "/"
|
|
157
|
-
fs.mkdir(remote_dir + "dir/")
|
|
158
|
-
assert fs.isdir(remote_dir + "dir/")
|
|
159
|
-
assert len(fs.ls(remote_dir + "dir/")) == 0
|
|
160
|
-
|
|
161
|
-
fs.mkdir(remote_dir + "dir/sub", create_parents=False)
|
|
162
|
-
assert fs.isdir(remote_dir + "dir/sub")
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def test_makedirs(fs, remote_dir):
|
|
166
|
-
fs.makedirs(remote_dir + "dir/a/b/c/")
|
|
167
|
-
assert fs.isdir(remote_dir + "dir/a/b/c/")
|
|
168
|
-
assert fs.isdir(remote_dir + "dir/a/b/")
|
|
169
|
-
assert fs.isdir(remote_dir + "dir/a/")
|
|
170
|
-
|
|
171
|
-
fs.makedirs(remote_dir + "dir/a/b/c/", exist_ok=True)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def test_exceptions(fs, remote_dir):
|
|
175
|
-
with pytest.raises(FileNotFoundError):
|
|
176
|
-
with fs.open(remote_dir + "/a.txt"):
|
|
177
|
-
...
|
|
178
|
-
|
|
179
|
-
with pytest.raises(FileNotFoundError):
|
|
180
|
-
fs.copy(remote_dir + "/u.txt", remote_dir + "/y.txt")
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def test_open_rw(fs, remote_dir):
|
|
184
|
-
data = b"dvc.org"
|
|
185
|
-
|
|
186
|
-
with fs.open(remote_dir + "/a.txt", "wb") as stream:
|
|
187
|
-
stream.write(data)
|
|
188
|
-
|
|
189
|
-
with fs.open(remote_dir + "/a.txt") as stream:
|
|
190
|
-
assert stream.read() == data
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def test_open_rw_flush(fs, remote_dir):
|
|
194
|
-
data = b"dvc.org"
|
|
195
|
-
|
|
196
|
-
with fs.open(remote_dir + "/b.txt", "wb") as stream:
|
|
197
|
-
for _ in range(200):
|
|
198
|
-
stream.write(data)
|
|
199
|
-
stream.write(data)
|
|
200
|
-
stream.flush()
|
|
201
|
-
|
|
202
|
-
with fs.open(remote_dir + "/b.txt", "rb") as stream:
|
|
203
|
-
assert stream.read() == data * 400
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def test_open_append(fs, remote_dir):
|
|
207
|
-
data = b"dvc.org"
|
|
208
|
-
|
|
209
|
-
with fs.open(remote_dir + "/a.txt", "wb") as stream:
|
|
210
|
-
stream.write(data)
|
|
211
|
-
|
|
212
|
-
with fs.open(remote_dir + "/a.txt", "ab") as stream:
|
|
213
|
-
stream.write(data)
|
|
214
|
-
|
|
215
|
-
with fs.open(remote_dir + "/a.txt") as stream:
|
|
216
|
-
assert stream.read() == 2 * data
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def test_open_seekable(fs, remote_dir):
|
|
220
|
-
data = b"dvc.org"
|
|
221
|
-
|
|
222
|
-
with fs.open(remote_dir + "/a.txt", "wb") as stream:
|
|
223
|
-
stream.write(data)
|
|
224
|
-
|
|
225
|
-
with fs.open(remote_dir + "/a.txt", "rb", seekable=True) as file:
|
|
226
|
-
file.seek(2)
|
|
227
|
-
assert file.read() == data[2:]
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
def test_seekable(fs, remote_dir):
|
|
231
|
-
data = b"dvc.org"
|
|
232
|
-
|
|
233
|
-
with fs.open(remote_dir + "/a.txt", "wb") as stream:
|
|
234
|
-
stream.write(data)
|
|
235
|
-
|
|
236
|
-
for seekable in [True, False]:
|
|
237
|
-
with fs.open(remote_dir + "/a.txt", "rb", seekable=seekable) as file:
|
|
238
|
-
assert file.seekable() == seekable
|
|
239
|
-
assert file.read() == data
|
|
240
|
-
|
|
241
|
-
with fs.open(remote_dir + "/a.txt", "rb", seekable=False) as file:
|
|
242
|
-
with pytest.raises(OSError):
|
|
243
|
-
file.seek(5)
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
def test_get_kwargs_from_urls_hadoop_fs():
|
|
247
|
-
kwargs = HadoopFileSystem._get_kwargs_from_urls(
|
|
248
|
-
"hdfs://user@localhost:8020/?replication=2"
|
|
249
|
-
)
|
|
250
|
-
assert kwargs["user"] == "user"
|
|
251
|
-
assert kwargs["host"] == "localhost"
|
|
252
|
-
assert kwargs["port"] == 8020
|
|
253
|
-
assert kwargs["replication"] == 2
|
|
254
|
-
|
|
255
|
-
kwargs = HadoopFileSystem._get_kwargs_from_urls("hdfs://user@localhost:8020/")
|
|
256
|
-
assert kwargs["user"] == "user"
|
|
257
|
-
assert kwargs["host"] == "localhost"
|
|
258
|
-
assert kwargs["port"] == 8020
|
|
259
|
-
assert "replication" not in kwargs
|