fsspec 2024.5.0__py3-none-any.whl → 2024.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. fsspec/_version.py +2 -2
  2. fsspec/caching.py +3 -2
  3. fsspec/compression.py +1 -1
  4. fsspec/generic.py +3 -0
  5. fsspec/implementations/cached.py +6 -16
  6. fsspec/implementations/dirfs.py +2 -0
  7. fsspec/implementations/github.py +12 -0
  8. fsspec/implementations/http.py +2 -1
  9. fsspec/implementations/reference.py +9 -0
  10. fsspec/implementations/smb.py +10 -0
  11. fsspec/json.py +121 -0
  12. fsspec/registry.py +24 -18
  13. fsspec/spec.py +119 -33
  14. fsspec/utils.py +1 -1
  15. {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/METADATA +10 -5
  16. fsspec-2024.6.1.dist-info/RECORD +55 -0
  17. {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/WHEEL +1 -1
  18. fsspec/implementations/tests/__init__.py +0 -0
  19. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +0 -112
  20. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +0 -582
  21. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +0 -873
  22. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +0 -458
  23. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +0 -1355
  24. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +0 -795
  25. fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +0 -613
  26. fsspec/implementations/tests/conftest.py +0 -39
  27. fsspec/implementations/tests/local/__init__.py +0 -0
  28. fsspec/implementations/tests/local/local_fixtures.py +0 -18
  29. fsspec/implementations/tests/local/local_test.py +0 -14
  30. fsspec/implementations/tests/memory/__init__.py +0 -0
  31. fsspec/implementations/tests/memory/memory_fixtures.py +0 -27
  32. fsspec/implementations/tests/memory/memory_test.py +0 -14
  33. fsspec/implementations/tests/out.zip +0 -0
  34. fsspec/implementations/tests/test_archive.py +0 -382
  35. fsspec/implementations/tests/test_arrow.py +0 -259
  36. fsspec/implementations/tests/test_cached.py +0 -1306
  37. fsspec/implementations/tests/test_common.py +0 -35
  38. fsspec/implementations/tests/test_dask.py +0 -29
  39. fsspec/implementations/tests/test_data.py +0 -20
  40. fsspec/implementations/tests/test_dbfs.py +0 -268
  41. fsspec/implementations/tests/test_dirfs.py +0 -588
  42. fsspec/implementations/tests/test_ftp.py +0 -178
  43. fsspec/implementations/tests/test_git.py +0 -76
  44. fsspec/implementations/tests/test_http.py +0 -577
  45. fsspec/implementations/tests/test_jupyter.py +0 -57
  46. fsspec/implementations/tests/test_libarchive.py +0 -33
  47. fsspec/implementations/tests/test_local.py +0 -1285
  48. fsspec/implementations/tests/test_memory.py +0 -382
  49. fsspec/implementations/tests/test_reference.py +0 -720
  50. fsspec/implementations/tests/test_sftp.py +0 -233
  51. fsspec/implementations/tests/test_smb.py +0 -139
  52. fsspec/implementations/tests/test_tar.py +0 -243
  53. fsspec/implementations/tests/test_webhdfs.py +0 -197
  54. fsspec/implementations/tests/test_zip.py +0 -134
  55. fsspec/tests/__init__.py +0 -0
  56. fsspec/tests/conftest.py +0 -188
  57. fsspec/tests/data/listing.html +0 -1
  58. fsspec/tests/test_api.py +0 -498
  59. fsspec/tests/test_async.py +0 -230
  60. fsspec/tests/test_caches.py +0 -255
  61. fsspec/tests/test_callbacks.py +0 -89
  62. fsspec/tests/test_compression.py +0 -164
  63. fsspec/tests/test_config.py +0 -129
  64. fsspec/tests/test_core.py +0 -466
  65. fsspec/tests/test_downstream.py +0 -40
  66. fsspec/tests/test_file.py +0 -200
  67. fsspec/tests/test_fuse.py +0 -147
  68. fsspec/tests/test_generic.py +0 -90
  69. fsspec/tests/test_gui.py +0 -23
  70. fsspec/tests/test_mapping.py +0 -228
  71. fsspec/tests/test_parquet.py +0 -140
  72. fsspec/tests/test_registry.py +0 -134
  73. fsspec/tests/test_spec.py +0 -1167
  74. fsspec/tests/test_utils.py +0 -478
  75. fsspec-2024.5.0.dist-info/RECORD +0 -111
  76. {fsspec-2024.5.0.dist-info → fsspec-2024.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,27 +0,0 @@
1
- import pytest
2
-
3
- from fsspec import filesystem
4
- from fsspec.tests.abstract import AbstractFixtures
5
-
6
-
7
- class MemoryFixtures(AbstractFixtures):
8
- @pytest.fixture(scope="class")
9
- def fs(self):
10
- m = filesystem("memory")
11
- m.store.clear()
12
- m.pseudo_dirs.clear()
13
- m.pseudo_dirs.append("")
14
- try:
15
- yield m
16
- finally:
17
- m.store.clear()
18
- m.pseudo_dirs.clear()
19
- m.pseudo_dirs.append("")
20
-
21
- @pytest.fixture
22
- def fs_join(self):
23
- return lambda *args: "/".join(args)
24
-
25
- @pytest.fixture
26
- def fs_path(self):
27
- return ""
@@ -1,14 +0,0 @@
1
- import fsspec.tests.abstract as abstract
2
- from fsspec.implementations.tests.memory.memory_fixtures import MemoryFixtures
3
-
4
-
5
- class TestMemoryCopy(abstract.AbstractCopyTests, MemoryFixtures):
6
- pass
7
-
8
-
9
- class TestMemoryGet(abstract.AbstractGetTests, MemoryFixtures):
10
- pass
11
-
12
-
13
- class TestMemoryPut(abstract.AbstractPutTests, MemoryFixtures):
14
- pass
Binary file
@@ -1,382 +0,0 @@
1
- import bz2
2
- import gzip
3
- import lzma
4
- import os
5
- import pickle
6
- import tarfile
7
- import tempfile
8
- import zipfile
9
- from contextlib import contextmanager
10
- from io import BytesIO
11
-
12
- import pytest
13
-
14
- import fsspec
15
-
16
- # The blueprint to create synthesized archive files from.
17
- archive_data = {"a": b"", "b": b"hello", "deeply/nested/path": b"stuff"}
18
-
19
-
20
- @contextmanager
21
- def tempzip(data=None):
22
- """
23
- Provide test cases with temporary synthesized Zip archives.
24
- """
25
- data = data or {}
26
- f = tempfile.mkstemp(suffix=".zip")[1]
27
- with zipfile.ZipFile(f, mode="w") as z:
28
- for k, v in data.items():
29
- z.writestr(k, v)
30
- try:
31
- yield f
32
- finally:
33
- try:
34
- os.remove(f)
35
- except OSError:
36
- pass
37
-
38
-
39
- @contextmanager
40
- def temparchive(data=None):
41
- """
42
- Provide test cases with temporary synthesized 7-Zip archives.
43
- """
44
- data = data or {}
45
- libarchive = pytest.importorskip("libarchive")
46
- f = tempfile.mkstemp(suffix=".7z")[1]
47
- with libarchive.file_writer(f, "7zip") as archive:
48
- for k, v in data.items():
49
- archive.add_file_from_memory(entry_path=k, entry_size=len(v), entry_data=v)
50
- try:
51
- yield f
52
- finally:
53
- try:
54
- os.remove(f)
55
- except OSError:
56
- pass
57
-
58
-
59
- @contextmanager
60
- def temptar(data=None, mode="w", suffix=".tar"):
61
- """
62
- Provide test cases with temporary synthesized .tar archives.
63
- """
64
- data = data or {}
65
- fn = tempfile.mkstemp(suffix=suffix)[1]
66
- with tarfile.TarFile.open(fn, mode=mode) as t:
67
- touched = {}
68
- for name, data in data.items():
69
- # Create directory hierarchy.
70
- # https://bugs.python.org/issue22208#msg225558
71
- if "/" in name and name not in touched:
72
- parts = os.path.dirname(name).split("/")
73
- for index in range(1, len(parts) + 1):
74
- info = tarfile.TarInfo("/".join(parts[:index]))
75
- info.type = tarfile.DIRTYPE
76
- t.addfile(info)
77
- touched[name] = True
78
-
79
- # Add file content.
80
- info = tarfile.TarInfo(name=name)
81
- info.size = len(data)
82
- t.addfile(info, BytesIO(data))
83
-
84
- try:
85
- yield fn
86
- finally:
87
- try:
88
- os.remove(fn)
89
- except OSError:
90
- pass
91
-
92
-
93
- @contextmanager
94
- def temptargz(data=None, mode="w", suffix=".tar.gz"):
95
- """
96
- Provide test cases with temporary synthesized .tar.gz archives.
97
- """
98
-
99
- with temptar(data=data, mode=mode) as tarname:
100
- fn = tempfile.mkstemp(suffix=suffix)[1]
101
- with open(tarname, "rb") as tar:
102
- cf = gzip.GzipFile(filename=fn, mode=mode)
103
- cf.write(tar.read())
104
- cf.close()
105
-
106
- try:
107
- yield fn
108
- finally:
109
- try:
110
- os.remove(fn)
111
- except OSError:
112
- pass
113
-
114
-
115
- @contextmanager
116
- def temptarbz2(data=None, mode="w", suffix=".tar.bz2"):
117
- """
118
- Provide test cases with temporary synthesized .tar.bz2 archives.
119
- """
120
-
121
- with temptar(data=data, mode=mode) as tarname:
122
- fn = tempfile.mkstemp(suffix=suffix)[1]
123
- with open(tarname, "rb") as tar:
124
- cf = bz2.BZ2File(filename=fn, mode=mode)
125
- cf.write(tar.read())
126
- cf.close()
127
-
128
- try:
129
- yield fn
130
- finally:
131
- try:
132
- os.remove(fn)
133
- except OSError:
134
- pass
135
-
136
-
137
- @contextmanager
138
- def temptarxz(data=None, mode="w", suffix=".tar.xz"):
139
- """
140
- Provide test cases with temporary synthesized .tar.xz archives.
141
- """
142
-
143
- with temptar(data=data, mode=mode) as tarname:
144
- fn = tempfile.mkstemp(suffix=suffix)[1]
145
- with open(tarname, "rb") as tar:
146
- cf = lzma.open(filename=fn, mode=mode, format=lzma.FORMAT_XZ)
147
- cf.write(tar.read())
148
- cf.close()
149
-
150
- try:
151
- yield fn
152
- finally:
153
- try:
154
- os.remove(fn)
155
- except OSError:
156
- pass
157
-
158
-
159
- class ArchiveTestScenario:
160
- """
161
- Describe a test scenario for any type of archive.
162
- """
163
-
164
- def __init__(self, protocol=None, provider=None, variant=None):
165
- # The filesystem protocol identifier. Any of "zip", "tar" or "libarchive".
166
- self.protocol = protocol
167
- # A contextmanager function to provide temporary synthesized archives.
168
- self.provider = provider
169
- # The filesystem protocol variant identifier. Any of "gz", "bz2" or "xz".
170
- self.variant = variant
171
-
172
-
173
- def pytest_generate_tests(metafunc):
174
- """
175
- Generate test scenario parametrization arguments with appropriate labels (idlist).
176
-
177
- On the one hand, this yields an appropriate output like::
178
-
179
- fsspec/implementations/tests/test_archive.py::TestArchive::test_empty[zip] PASSED # noqa
180
-
181
- On the other hand, it will support perfect test discovery, like::
182
-
183
- pytest fsspec -vvv -k "zip or tar or libarchive"
184
-
185
- https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios
186
- """
187
- idlist = []
188
- argnames = ["scenario"]
189
- argvalues = []
190
- for scenario in metafunc.cls.scenarios:
191
- scenario: ArchiveTestScenario = scenario
192
- label = scenario.protocol
193
- if scenario.variant:
194
- label += "-" + scenario.variant
195
- idlist.append(label)
196
- argvalues.append([scenario])
197
- metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class")
198
-
199
-
200
- # Define test scenarios.
201
- scenario_zip = ArchiveTestScenario(protocol="zip", provider=tempzip)
202
- scenario_tar = ArchiveTestScenario(protocol="tar", provider=temptar)
203
- scenario_targz = ArchiveTestScenario(protocol="tar", provider=temptargz, variant="gz")
204
- scenario_tarbz2 = ArchiveTestScenario(
205
- protocol="tar", provider=temptarbz2, variant="bz2"
206
- )
207
- scenario_tarxz = ArchiveTestScenario(protocol="tar", provider=temptarxz, variant="xz")
208
- scenario_libarchive = ArchiveTestScenario(protocol="libarchive", provider=temparchive)
209
-
210
-
211
- class TestAnyArchive:
212
- """
213
- Validate that all filesystem adapter implementations for archive files
214
- will adhere to the same specification.
215
- """
216
-
217
- scenarios = [
218
- scenario_zip,
219
- scenario_tar,
220
- scenario_targz,
221
- scenario_tarbz2,
222
- scenario_tarxz,
223
- scenario_libarchive,
224
- ]
225
-
226
- def test_repr(self, scenario: ArchiveTestScenario):
227
- with scenario.provider() as archive:
228
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
229
- assert repr(fs).startswith("<Archive-like object")
230
-
231
- def test_empty(self, scenario: ArchiveTestScenario):
232
- with scenario.provider() as archive:
233
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
234
- assert fs.find("") == []
235
- assert fs.find("", withdirs=True) == []
236
- with pytest.raises(FileNotFoundError):
237
- fs.info("")
238
- assert fs.ls("") == []
239
-
240
- def test_glob(self, scenario: ArchiveTestScenario):
241
- with scenario.provider(archive_data) as archive:
242
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
243
- assert fs.glob("*/*/*th") == ["deeply/nested/path"]
244
-
245
- def test_mapping(self, scenario: ArchiveTestScenario):
246
- with scenario.provider(archive_data) as archive:
247
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
248
- m = fs.get_mapper()
249
- assert list(m) == ["a", "b", "deeply/nested/path"]
250
- assert m["b"] == archive_data["b"]
251
-
252
- def test_pickle(self, scenario: ArchiveTestScenario):
253
- with scenario.provider(archive_data) as archive:
254
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
255
- fs2 = pickle.loads(pickle.dumps(fs))
256
- assert fs2.cat("b") == b"hello"
257
-
258
- def test_all_dirnames(self, scenario: ArchiveTestScenario):
259
- with scenario.provider(archive_data) as archive:
260
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
261
-
262
- # fx are files, dx are a directories
263
- assert fs._all_dirnames([]) == set()
264
- assert fs._all_dirnames(["f1"]) == set()
265
- assert fs._all_dirnames(["f1", "f2"]) == set()
266
- assert fs._all_dirnames(["f1", "f2", "d1/f1"]) == {"d1"}
267
- assert fs._all_dirnames(["f1", "d1/f1", "d1/f2"]) == {"d1"}
268
- assert fs._all_dirnames(["f1", "d1/f1", "d2/f1"]) == {"d1", "d2"}
269
- assert fs._all_dirnames(["d1/d1/d1/f1"]) == {"d1", "d1/d1", "d1/d1/d1"}
270
-
271
- def test_ls(self, scenario: ArchiveTestScenario):
272
- with scenario.provider(archive_data) as archive:
273
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
274
-
275
- assert fs.ls("", detail=False) == ["a", "b", "deeply"]
276
- assert fs.ls("/") == fs.ls("")
277
-
278
- assert fs.ls("deeply", detail=False) == ["deeply/nested"]
279
- assert fs.ls("deeply/") == fs.ls("deeply")
280
-
281
- assert fs.ls("deeply/nested", detail=False) == ["deeply/nested/path"]
282
- assert fs.ls("deeply/nested/") == fs.ls("deeply/nested")
283
-
284
- def test_find(self, scenario: ArchiveTestScenario):
285
- with scenario.provider(archive_data) as archive:
286
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
287
-
288
- assert fs.find("") == ["a", "b", "deeply/nested/path"]
289
- assert fs.find("", withdirs=True) == [
290
- "a",
291
- "b",
292
- "deeply",
293
- "deeply/nested",
294
- "deeply/nested/path",
295
- ]
296
-
297
- assert fs.find("deeply") == ["deeply/nested/path"]
298
- assert fs.find("deeply/") == fs.find("deeply")
299
-
300
- @pytest.mark.parametrize("topdown", [True, False])
301
- @pytest.mark.parametrize("prune_nested", [True, False])
302
- def test_walk(self, scenario: ArchiveTestScenario, topdown, prune_nested):
303
- with scenario.provider(archive_data) as archive:
304
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
305
- expected = [
306
- # (dirname, list of subdirs, list of files)
307
- ("", ["deeply"], ["a", "b"]),
308
- ("deeply", ["nested"], []),
309
- ]
310
- if not topdown or not prune_nested:
311
- expected.append(("deeply/nested", [], ["path"]))
312
- if not topdown:
313
- expected.reverse()
314
-
315
- result = []
316
- for path, dirs, files in fs.walk("", topdown=topdown):
317
- result.append((path, dirs.copy(), files))
318
- # Bypass the "nested" dir
319
- if prune_nested and "nested" in dirs:
320
- dirs.remove("nested")
321
-
322
- # prior py3.10 zip() does not support strict=True, we need
323
- # a manual len check here
324
- assert len(result) == len(expected)
325
- for lhs, rhs in zip(result, expected):
326
- assert lhs[0] == rhs[0]
327
- assert sorted(lhs[1]) == sorted(rhs[1])
328
- assert sorted(lhs[2]) == sorted(rhs[2])
329
-
330
- def test_info(self, scenario: ArchiveTestScenario):
331
- # https://github.com/Suor/funcy/blob/1.15/funcy/colls.py#L243-L245
332
- def project(mapping, keys):
333
- """Leaves only given keys in mapping."""
334
- return {k: mapping[k] for k in keys if k in mapping}
335
-
336
- with scenario.provider(archive_data) as archive:
337
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
338
-
339
- with pytest.raises(FileNotFoundError):
340
- fs.info("i-do-not-exist")
341
-
342
- # Iterate over all directories.
343
- for d in fs._all_dirnames(archive_data.keys()):
344
- lhs = project(fs.info(d), ["name", "size", "type"])
345
- expected = {"name": f"{d}", "size": 0, "type": "directory"}
346
- assert lhs == expected
347
-
348
- # Iterate over all files.
349
- for f, v in archive_data.items():
350
- lhs = fs.info(f)
351
- assert lhs["name"] == f
352
- assert lhs["size"] == len(v)
353
- assert lhs["type"] == "file"
354
-
355
- @pytest.mark.parametrize("scale", [128, 512, 4096])
356
- def test_isdir_isfile(self, scenario: ArchiveTestScenario, scale: int):
357
- def make_nested_dir(i):
358
- x = f"{i}"
359
- table = x.maketrans("0123456789", "ABCDEFGHIJ")
360
- return "/".join(x.translate(table))
361
-
362
- scaled_data = {f"{make_nested_dir(i)}/{i}": b"" for i in range(1, scale + 1)}
363
- with scenario.provider(scaled_data) as archive:
364
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
365
-
366
- lhs_dirs, lhs_files = (
367
- fs._all_dirnames(scaled_data.keys()),
368
- scaled_data.keys(),
369
- )
370
-
371
- # Warm-up the Cache, this is done in both cases anyways...
372
- fs._get_dirs()
373
-
374
- entries = lhs_files | lhs_dirs
375
-
376
- assert lhs_dirs == {e for e in entries if fs.isdir(e)}
377
- assert lhs_files == {e for e in entries if fs.isfile(e)}
378
-
379
- def test_read_empty_file(self, scenario: ArchiveTestScenario):
380
- with scenario.provider(archive_data) as archive:
381
- fs = fsspec.filesystem(scenario.protocol, fo=archive)
382
- assert fs.open("a").read() == b""
@@ -1,259 +0,0 @@
1
- import secrets
2
-
3
- import pytest
4
-
5
- pyarrow_fs = pytest.importorskip("pyarrow.fs")
6
- FileSystem = pyarrow_fs.FileSystem
7
-
8
- from fsspec.implementations.arrow import ArrowFSWrapper, HadoopFileSystem # noqa
9
-
10
-
11
- @pytest.fixture(scope="function")
12
- def fs():
13
- fs, _ = FileSystem.from_uri("mock://")
14
- return ArrowFSWrapper(fs)
15
-
16
-
17
- @pytest.fixture(scope="function", params=[False, True])
18
- def remote_dir(fs, request):
19
- directory = secrets.token_hex(16)
20
- fs.makedirs(directory)
21
- yield ("hdfs://" if request.param else "/") + directory
22
- fs.rm(directory, recursive=True)
23
-
24
-
25
- def test_protocol():
26
- fs, _ = FileSystem.from_uri("mock://")
27
- fss = ArrowFSWrapper(fs)
28
- assert fss.protocol == "mock"
29
-
30
-
31
- def strip_keys(original_entry):
32
- entry = original_entry.copy()
33
- entry.pop("mtime")
34
- return entry
35
-
36
-
37
- def test_strip(fs):
38
- assert fs._strip_protocol("/a/file") == "/a/file"
39
- assert fs._strip_protocol("hdfs:///a/file") == "/a/file"
40
- assert fs._strip_protocol("hdfs://1.1.1.1/a/file") == "/a/file"
41
- assert fs._strip_protocol("hdfs://1.1.1.1:8888/a/file") == "/a/file"
42
-
43
-
44
- def test_info(fs, remote_dir):
45
- fs.touch(remote_dir + "/a.txt")
46
- remote_dir_strip_protocol = fs._strip_protocol(remote_dir)
47
- details = fs.info(remote_dir + "/a.txt")
48
- assert details["type"] == "file"
49
- assert details["name"] == remote_dir_strip_protocol + "/a.txt"
50
- assert details["size"] == 0
51
-
52
- fs.mkdir(remote_dir + "/dir")
53
- details = fs.info(remote_dir + "/dir")
54
- assert details["type"] == "directory"
55
- assert details["name"] == remote_dir_strip_protocol + "/dir"
56
-
57
- details = fs.info(remote_dir + "/dir/")
58
- assert details["name"] == remote_dir_strip_protocol + "/dir/"
59
-
60
-
61
- def test_move(fs, remote_dir):
62
- fs.touch(remote_dir + "/a.txt")
63
- initial_info = fs.info(remote_dir + "/a.txt")
64
-
65
- fs.move(remote_dir + "/a.txt", remote_dir + "/b.txt")
66
- secondary_info = fs.info(remote_dir + "/b.txt")
67
-
68
- assert not fs.exists(remote_dir + "/a.txt")
69
- assert fs.exists(remote_dir + "/b.txt")
70
-
71
- initial_info.pop("name")
72
- secondary_info.pop("name")
73
- assert initial_info == secondary_info
74
-
75
-
76
- def test_move_recursive(fs, remote_dir):
77
- src = remote_dir + "/src"
78
- dest = remote_dir + "/dest"
79
-
80
- assert fs.isdir(src) is False
81
- fs.mkdir(src)
82
- assert fs.isdir(src)
83
-
84
- fs.touch(src + "/a.txt")
85
- fs.mkdir(src + "/b")
86
- fs.touch(src + "/b/c.txt")
87
- fs.move(src, dest, recursive=True)
88
-
89
- assert fs.isdir(src) is False
90
- assert not fs.exists(src)
91
-
92
- assert fs.isdir(dest)
93
- assert fs.exists(dest)
94
- assert fs.cat(dest + "/b/c.txt") == fs.cat(dest + "/a.txt") == b""
95
-
96
-
97
- def test_copy(fs, remote_dir):
98
- fs.touch(remote_dir + "/a.txt")
99
- initial_info = fs.info(remote_dir + "/a.txt")
100
-
101
- fs.copy(remote_dir + "/a.txt", remote_dir + "/b.txt")
102
- secondary_info = fs.info(remote_dir + "/b.txt")
103
-
104
- assert fs.exists(remote_dir + "/a.txt")
105
- assert fs.exists(remote_dir + "/b.txt")
106
-
107
- initial_info.pop("name")
108
- secondary_info.pop("name")
109
- assert strip_keys(initial_info) == strip_keys(secondary_info)
110
-
111
-
112
- def test_rm(fs, remote_dir):
113
- fs.touch(remote_dir + "/a.txt")
114
- fs.rm(remote_dir + "/a.txt", recursive=True)
115
- assert not fs.exists(remote_dir + "/a.txt")
116
-
117
- fs.mkdir(remote_dir + "/dir")
118
- fs.rm(remote_dir + "/dir", recursive=True)
119
- assert not fs.exists(remote_dir + "/dir")
120
-
121
- fs.mkdir(remote_dir + "/dir")
122
- fs.touch(remote_dir + "/dir/a")
123
- fs.touch(remote_dir + "/dir/b")
124
- fs.mkdir(remote_dir + "/dir/c/")
125
- fs.touch(remote_dir + "/dir/c/a")
126
- fs.rm(remote_dir + "/dir", recursive=True)
127
- assert not fs.exists(remote_dir + "/dir")
128
-
129
-
130
- def test_ls(fs, remote_dir):
131
- if remote_dir != "/":
132
- remote_dir = remote_dir + "/"
133
- remote_dir_strip_protocol = fs._strip_protocol(remote_dir)
134
- fs.mkdir(remote_dir + "dir/")
135
- files = set()
136
- for no in range(8):
137
- file = remote_dir + f"dir/test_{no}"
138
- # we also want to make sure `fs.touch` works with protocol
139
- fs.touch(file)
140
- files.add(remote_dir_strip_protocol + f"dir/test_{no}")
141
-
142
- assert set(fs.ls(remote_dir + "dir/")) == files
143
-
144
- dirs = fs.ls(remote_dir + "dir/", detail=True)
145
- expected = [fs.info(file) for file in files]
146
-
147
- by_name = lambda details: details["name"]
148
- dirs.sort(key=by_name)
149
- expected.sort(key=by_name)
150
-
151
- assert dirs == expected
152
-
153
-
154
- def test_mkdir(fs, remote_dir):
155
- if remote_dir != "/":
156
- remote_dir = remote_dir + "/"
157
- fs.mkdir(remote_dir + "dir/")
158
- assert fs.isdir(remote_dir + "dir/")
159
- assert len(fs.ls(remote_dir + "dir/")) == 0
160
-
161
- fs.mkdir(remote_dir + "dir/sub", create_parents=False)
162
- assert fs.isdir(remote_dir + "dir/sub")
163
-
164
-
165
- def test_makedirs(fs, remote_dir):
166
- fs.makedirs(remote_dir + "dir/a/b/c/")
167
- assert fs.isdir(remote_dir + "dir/a/b/c/")
168
- assert fs.isdir(remote_dir + "dir/a/b/")
169
- assert fs.isdir(remote_dir + "dir/a/")
170
-
171
- fs.makedirs(remote_dir + "dir/a/b/c/", exist_ok=True)
172
-
173
-
174
- def test_exceptions(fs, remote_dir):
175
- with pytest.raises(FileNotFoundError):
176
- with fs.open(remote_dir + "/a.txt"):
177
- ...
178
-
179
- with pytest.raises(FileNotFoundError):
180
- fs.copy(remote_dir + "/u.txt", remote_dir + "/y.txt")
181
-
182
-
183
- def test_open_rw(fs, remote_dir):
184
- data = b"dvc.org"
185
-
186
- with fs.open(remote_dir + "/a.txt", "wb") as stream:
187
- stream.write(data)
188
-
189
- with fs.open(remote_dir + "/a.txt") as stream:
190
- assert stream.read() == data
191
-
192
-
193
- def test_open_rw_flush(fs, remote_dir):
194
- data = b"dvc.org"
195
-
196
- with fs.open(remote_dir + "/b.txt", "wb") as stream:
197
- for _ in range(200):
198
- stream.write(data)
199
- stream.write(data)
200
- stream.flush()
201
-
202
- with fs.open(remote_dir + "/b.txt", "rb") as stream:
203
- assert stream.read() == data * 400
204
-
205
-
206
- def test_open_append(fs, remote_dir):
207
- data = b"dvc.org"
208
-
209
- with fs.open(remote_dir + "/a.txt", "wb") as stream:
210
- stream.write(data)
211
-
212
- with fs.open(remote_dir + "/a.txt", "ab") as stream:
213
- stream.write(data)
214
-
215
- with fs.open(remote_dir + "/a.txt") as stream:
216
- assert stream.read() == 2 * data
217
-
218
-
219
- def test_open_seekable(fs, remote_dir):
220
- data = b"dvc.org"
221
-
222
- with fs.open(remote_dir + "/a.txt", "wb") as stream:
223
- stream.write(data)
224
-
225
- with fs.open(remote_dir + "/a.txt", "rb", seekable=True) as file:
226
- file.seek(2)
227
- assert file.read() == data[2:]
228
-
229
-
230
- def test_seekable(fs, remote_dir):
231
- data = b"dvc.org"
232
-
233
- with fs.open(remote_dir + "/a.txt", "wb") as stream:
234
- stream.write(data)
235
-
236
- for seekable in [True, False]:
237
- with fs.open(remote_dir + "/a.txt", "rb", seekable=seekable) as file:
238
- assert file.seekable() == seekable
239
- assert file.read() == data
240
-
241
- with fs.open(remote_dir + "/a.txt", "rb", seekable=False) as file:
242
- with pytest.raises(OSError):
243
- file.seek(5)
244
-
245
-
246
- def test_get_kwargs_from_urls_hadoop_fs():
247
- kwargs = HadoopFileSystem._get_kwargs_from_urls(
248
- "hdfs://user@localhost:8020/?replication=2"
249
- )
250
- assert kwargs["user"] == "user"
251
- assert kwargs["host"] == "localhost"
252
- assert kwargs["port"] == 8020
253
- assert kwargs["replication"] == 2
254
-
255
- kwargs = HadoopFileSystem._get_kwargs_from_urls("hdfs://user@localhost:8020/")
256
- assert kwargs["user"] == "user"
257
- assert kwargs["host"] == "localhost"
258
- assert kwargs["port"] == 8020
259
- assert "replication" not in kwargs