fsspec 2024.3.1__py3-none-any.whl → 2024.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +2 -3
- fsspec/_version.py +14 -19
- fsspec/caching.py +83 -14
- fsspec/compression.py +1 -0
- fsspec/core.py +32 -8
- fsspec/exceptions.py +1 -0
- fsspec/generic.py +1 -1
- fsspec/gui.py +1 -1
- fsspec/implementations/arrow.py +0 -2
- fsspec/implementations/cache_mapper.py +1 -2
- fsspec/implementations/cache_metadata.py +7 -7
- fsspec/implementations/dirfs.py +2 -2
- fsspec/implementations/http.py +9 -9
- fsspec/implementations/local.py +78 -45
- fsspec/implementations/memory.py +9 -0
- fsspec/implementations/smb.py +3 -1
- fsspec/implementations/tests/__init__.py +0 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_file_listing.yaml +112 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_mkdir.yaml +582 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_pyarrow_non_partitioned.yaml +873 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range.yaml +458 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_read_range_chunked.yaml +1355 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_and_read.yaml +795 -0
- fsspec/implementations/tests/cassettes/test_dbfs/test_dbfs_write_pyarrow_non_partitioned.yaml +613 -0
- fsspec/implementations/tests/conftest.py +39 -0
- fsspec/implementations/tests/local/__init__.py +0 -0
- fsspec/implementations/tests/local/local_fixtures.py +18 -0
- fsspec/implementations/tests/local/local_test.py +14 -0
- fsspec/implementations/tests/memory/__init__.py +0 -0
- fsspec/implementations/tests/memory/memory_fixtures.py +27 -0
- fsspec/implementations/tests/memory/memory_test.py +14 -0
- fsspec/implementations/tests/out.zip +0 -0
- fsspec/implementations/tests/test_archive.py +382 -0
- fsspec/implementations/tests/test_arrow.py +259 -0
- fsspec/implementations/tests/test_cached.py +1306 -0
- fsspec/implementations/tests/test_common.py +35 -0
- fsspec/implementations/tests/test_dask.py +29 -0
- fsspec/implementations/tests/test_data.py +20 -0
- fsspec/implementations/tests/test_dbfs.py +268 -0
- fsspec/implementations/tests/test_dirfs.py +588 -0
- fsspec/implementations/tests/test_ftp.py +178 -0
- fsspec/implementations/tests/test_git.py +76 -0
- fsspec/implementations/tests/test_http.py +577 -0
- fsspec/implementations/tests/test_jupyter.py +57 -0
- fsspec/implementations/tests/test_libarchive.py +33 -0
- fsspec/implementations/tests/test_local.py +1285 -0
- fsspec/implementations/tests/test_memory.py +382 -0
- fsspec/implementations/tests/test_reference.py +720 -0
- fsspec/implementations/tests/test_sftp.py +233 -0
- fsspec/implementations/tests/test_smb.py +139 -0
- fsspec/implementations/tests/test_tar.py +243 -0
- fsspec/implementations/tests/test_webhdfs.py +197 -0
- fsspec/implementations/tests/test_zip.py +134 -0
- fsspec/implementations/webhdfs.py +1 -3
- fsspec/parquet.py +0 -8
- fsspec/registry.py +4 -0
- fsspec/spec.py +21 -4
- fsspec/tests/__init__.py +0 -0
- fsspec/tests/abstract/mv.py +57 -0
- fsspec/tests/conftest.py +188 -0
- fsspec/tests/data/listing.html +1 -0
- fsspec/tests/test_api.py +498 -0
- fsspec/tests/test_async.py +230 -0
- fsspec/tests/test_caches.py +255 -0
- fsspec/tests/test_callbacks.py +89 -0
- fsspec/tests/test_compression.py +164 -0
- fsspec/tests/test_config.py +129 -0
- fsspec/tests/test_core.py +466 -0
- fsspec/tests/test_downstream.py +40 -0
- fsspec/tests/test_file.py +200 -0
- fsspec/tests/test_fuse.py +147 -0
- fsspec/tests/test_generic.py +90 -0
- fsspec/tests/test_gui.py +23 -0
- fsspec/tests/test_mapping.py +228 -0
- fsspec/tests/test_parquet.py +140 -0
- fsspec/tests/test_registry.py +134 -0
- fsspec/tests/test_spec.py +1167 -0
- fsspec/tests/test_utils.py +478 -0
- fsspec/utils.py +0 -2
- fsspec-2024.5.0.dist-info/METADATA +273 -0
- fsspec-2024.5.0.dist-info/RECORD +111 -0
- {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info}/WHEEL +1 -2
- fsspec-2024.3.1.dist-info/METADATA +0 -167
- fsspec-2024.3.1.dist-info/RECORD +0 -54
- fsspec-2024.3.1.dist-info/top_level.txt +0 -1
- {fsspec-2024.3.1.dist-info → fsspec-2024.5.0.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,478 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path, PurePath
|
|
4
|
+
from unittest.mock import Mock
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
import fsspec.utils
|
|
9
|
+
from fsspec.utils import (
|
|
10
|
+
can_be_local,
|
|
11
|
+
common_prefix,
|
|
12
|
+
get_protocol,
|
|
13
|
+
infer_storage_options,
|
|
14
|
+
merge_offset_ranges,
|
|
15
|
+
mirror_from,
|
|
16
|
+
other_paths,
|
|
17
|
+
read_block,
|
|
18
|
+
seek_delimiter,
|
|
19
|
+
setup_logging,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
WIN = sys.platform.startswith("win")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_read_block():
|
|
26
|
+
delimiter = b"\n"
|
|
27
|
+
data = delimiter.join([b"123", b"456", b"789"])
|
|
28
|
+
f = io.BytesIO(data)
|
|
29
|
+
|
|
30
|
+
assert read_block(f, 1, 2) == b"23"
|
|
31
|
+
assert read_block(f, 0, 1, delimiter=b"\n") == b"123\n"
|
|
32
|
+
assert read_block(f, 0, 2, delimiter=b"\n") == b"123\n"
|
|
33
|
+
assert read_block(f, 0, 3, delimiter=b"\n") == b"123\n"
|
|
34
|
+
assert read_block(f, 0, 5, delimiter=b"\n") == b"123\n456\n"
|
|
35
|
+
assert read_block(f, 0, 8, delimiter=b"\n") == b"123\n456\n789"
|
|
36
|
+
assert read_block(f, 0, 100, delimiter=b"\n") == b"123\n456\n789"
|
|
37
|
+
assert read_block(f, 1, 1, delimiter=b"\n") == b""
|
|
38
|
+
assert read_block(f, 1, 5, delimiter=b"\n") == b"456\n"
|
|
39
|
+
assert read_block(f, 1, 8, delimiter=b"\n") == b"456\n789"
|
|
40
|
+
|
|
41
|
+
for ols in [[(0, 3), (3, 3), (6, 3), (9, 2)], [(0, 4), (4, 4), (8, 4)]]:
|
|
42
|
+
out = [read_block(f, o, l, b"\n") for o, l in ols]
|
|
43
|
+
assert b"".join(filter(None, out)) == data
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_read_block_split_before():
|
|
47
|
+
"""Test start/middle/end cases of split_before.""" # noqa: I
|
|
48
|
+
d = (
|
|
49
|
+
"#header" + "".join(">foo{i}\nFOOBAR{i}\n".format(i=i) for i in range(100000))
|
|
50
|
+
).encode()
|
|
51
|
+
|
|
52
|
+
# Read single record at beginning.
|
|
53
|
+
# All reads include beginning of file and read through termination of
|
|
54
|
+
# delimited record.
|
|
55
|
+
assert read_block(io.BytesIO(d), 0, 10, delimiter=b"\n") == b"#header>foo0\n"
|
|
56
|
+
assert (
|
|
57
|
+
read_block(io.BytesIO(d), 0, 10, delimiter=b"\n", split_before=True)
|
|
58
|
+
== b"#header>foo0"
|
|
59
|
+
)
|
|
60
|
+
assert (
|
|
61
|
+
read_block(io.BytesIO(d), 0, 10, delimiter=b">") == b"#header>foo0\nFOOBAR0\n>"
|
|
62
|
+
)
|
|
63
|
+
assert (
|
|
64
|
+
read_block(io.BytesIO(d), 0, 10, delimiter=b">", split_before=True)
|
|
65
|
+
== b"#header>foo0\nFOOBAR0\n"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Read multiple records at beginning.
|
|
69
|
+
# All reads include beginning of file and read through termination of
|
|
70
|
+
# delimited record.
|
|
71
|
+
assert (
|
|
72
|
+
read_block(io.BytesIO(d), 0, 27, delimiter=b"\n")
|
|
73
|
+
== b"#header>foo0\nFOOBAR0\n>foo1\nFOOBAR1\n"
|
|
74
|
+
)
|
|
75
|
+
assert (
|
|
76
|
+
read_block(io.BytesIO(d), 0, 27, delimiter=b"\n", split_before=True)
|
|
77
|
+
== b"#header>foo0\nFOOBAR0\n>foo1\nFOOBAR1"
|
|
78
|
+
)
|
|
79
|
+
assert (
|
|
80
|
+
read_block(io.BytesIO(d), 0, 27, delimiter=b">")
|
|
81
|
+
== b"#header>foo0\nFOOBAR0\n>foo1\nFOOBAR1\n>"
|
|
82
|
+
)
|
|
83
|
+
assert (
|
|
84
|
+
read_block(io.BytesIO(d), 0, 27, delimiter=b">", split_before=True)
|
|
85
|
+
== b"#header>foo0\nFOOBAR0\n>foo1\nFOOBAR1\n"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Read with offset spanning into next record, splits on either side of delimiter.
|
|
89
|
+
# Read not spanning the full record returns nothing.
|
|
90
|
+
assert read_block(io.BytesIO(d), 10, 3, delimiter=b"\n") == b"FOOBAR0\n"
|
|
91
|
+
assert (
|
|
92
|
+
read_block(io.BytesIO(d), 10, 3, delimiter=b"\n", split_before=True)
|
|
93
|
+
== b"\nFOOBAR0"
|
|
94
|
+
)
|
|
95
|
+
assert read_block(io.BytesIO(d), 10, 3, delimiter=b">") == b""
|
|
96
|
+
assert read_block(io.BytesIO(d), 10, 3, delimiter=b">", split_before=True) == b""
|
|
97
|
+
|
|
98
|
+
# Read with offset spanning multiple records, splits on either side of delimiter
|
|
99
|
+
assert (
|
|
100
|
+
read_block(io.BytesIO(d), 10, 20, delimiter=b"\n")
|
|
101
|
+
== b"FOOBAR0\n>foo1\nFOOBAR1\n"
|
|
102
|
+
)
|
|
103
|
+
assert (
|
|
104
|
+
read_block(io.BytesIO(d), 10, 20, delimiter=b"\n", split_before=True)
|
|
105
|
+
== b"\nFOOBAR0\n>foo1\nFOOBAR1"
|
|
106
|
+
)
|
|
107
|
+
assert read_block(io.BytesIO(d), 10, 20, delimiter=b">") == b"foo1\nFOOBAR1\n>"
|
|
108
|
+
assert (
|
|
109
|
+
read_block(io.BytesIO(d), 10, 20, delimiter=b">", split_before=True)
|
|
110
|
+
== b">foo1\nFOOBAR1\n"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Read record at end, all records read to end
|
|
114
|
+
|
|
115
|
+
tlen = len(d)
|
|
116
|
+
|
|
117
|
+
assert (
|
|
118
|
+
read_block(io.BytesIO(d), tlen - 30, 35, delimiter=b"\n")
|
|
119
|
+
== b">foo99999\nFOOBAR99999\n"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
assert (
|
|
123
|
+
read_block(io.BytesIO(d), tlen - 30, 35, delimiter=b"\n", split_before=True)
|
|
124
|
+
== b"\n>foo99999\nFOOBAR99999\n"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
assert (
|
|
128
|
+
read_block(io.BytesIO(d), tlen - 30, 35, delimiter=b">")
|
|
129
|
+
== b"foo99999\nFOOBAR99999\n"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
assert (
|
|
133
|
+
read_block(io.BytesIO(d), tlen - 30, 35, delimiter=b">", split_before=True)
|
|
134
|
+
== b">foo99999\nFOOBAR99999\n"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def test_seek_delimiter_endline():
|
|
139
|
+
f = io.BytesIO(b"123\n456\n789")
|
|
140
|
+
|
|
141
|
+
# if at zero, stay at zero
|
|
142
|
+
seek_delimiter(f, b"\n", 5)
|
|
143
|
+
assert f.tell() == 0
|
|
144
|
+
|
|
145
|
+
# choose the first block
|
|
146
|
+
for bs in [1, 5, 100]:
|
|
147
|
+
f.seek(1)
|
|
148
|
+
seek_delimiter(f, b"\n", blocksize=bs)
|
|
149
|
+
assert f.tell() == 4
|
|
150
|
+
|
|
151
|
+
# handle long delimiters well, even with short blocksizes
|
|
152
|
+
f = io.BytesIO(b"123abc456abc789")
|
|
153
|
+
for bs in [1, 2, 3, 4, 5, 6, 10]:
|
|
154
|
+
f.seek(1)
|
|
155
|
+
seek_delimiter(f, b"abc", blocksize=bs)
|
|
156
|
+
assert f.tell() == 6
|
|
157
|
+
|
|
158
|
+
# End at the end
|
|
159
|
+
f = io.BytesIO(b"123\n456")
|
|
160
|
+
f.seek(5)
|
|
161
|
+
seek_delimiter(f, b"\n", 5)
|
|
162
|
+
assert f.tell() == 7
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_infer_options():
|
|
166
|
+
so = infer_storage_options("/mnt/datasets/test.csv")
|
|
167
|
+
assert so.pop("protocol") == "file"
|
|
168
|
+
assert so.pop("path") == "/mnt/datasets/test.csv"
|
|
169
|
+
assert not so
|
|
170
|
+
|
|
171
|
+
assert infer_storage_options("./test.csv")["path"] == "./test.csv"
|
|
172
|
+
assert infer_storage_options("../test.csv")["path"] == "../test.csv"
|
|
173
|
+
|
|
174
|
+
so = infer_storage_options("C:\\test.csv")
|
|
175
|
+
assert so.pop("protocol") == "file"
|
|
176
|
+
assert so.pop("path") == "C:\\test.csv"
|
|
177
|
+
assert not so
|
|
178
|
+
|
|
179
|
+
assert infer_storage_options("d:\\test.csv")["path"] == "d:\\test.csv"
|
|
180
|
+
assert infer_storage_options("\\test.csv")["path"] == "\\test.csv"
|
|
181
|
+
assert infer_storage_options(".\\test.csv")["path"] == ".\\test.csv"
|
|
182
|
+
assert infer_storage_options("test.csv")["path"] == "test.csv"
|
|
183
|
+
|
|
184
|
+
so = infer_storage_options(
|
|
185
|
+
"hdfs://username:pwd@Node:123/mnt/datasets/test.csv?q=1#fragm",
|
|
186
|
+
inherit_storage_options={"extra": "value"},
|
|
187
|
+
)
|
|
188
|
+
assert so.pop("protocol") == "hdfs"
|
|
189
|
+
assert so.pop("username") == "username"
|
|
190
|
+
assert so.pop("password") == "pwd"
|
|
191
|
+
assert so.pop("host") == "Node"
|
|
192
|
+
assert so.pop("port") == 123
|
|
193
|
+
assert so.pop("path") == "/mnt/datasets/test.csv#fragm"
|
|
194
|
+
assert so.pop("url_query") == "q=1"
|
|
195
|
+
assert so.pop("url_fragment") == "fragm"
|
|
196
|
+
assert so.pop("extra") == "value"
|
|
197
|
+
assert not so
|
|
198
|
+
|
|
199
|
+
so = infer_storage_options("hdfs://User-name@Node-name.com/mnt/datasets/test.csv")
|
|
200
|
+
assert so.pop("username") == "User-name"
|
|
201
|
+
assert so.pop("host") == "Node-name.com"
|
|
202
|
+
|
|
203
|
+
u = "http://127.0.0.1:8080/test.csv"
|
|
204
|
+
assert infer_storage_options(u) == {"protocol": "http", "path": u}
|
|
205
|
+
|
|
206
|
+
# For s3 and gcs the netloc is actually the bucket name, so we want to
|
|
207
|
+
# include it in the path. Test that:
|
|
208
|
+
# - Parsing doesn't lowercase the bucket
|
|
209
|
+
# - The bucket is included in path
|
|
210
|
+
for protocol in ["s3", "s3a", "gcs", "gs"]:
|
|
211
|
+
options = infer_storage_options(f"{protocol}://Bucket-name.com/test.csv")
|
|
212
|
+
assert options["path"] == "Bucket-name.com/test.csv"
|
|
213
|
+
|
|
214
|
+
with pytest.raises(KeyError):
|
|
215
|
+
infer_storage_options("file:///bucket/file.csv", {"path": "collide"})
|
|
216
|
+
with pytest.raises(KeyError):
|
|
217
|
+
infer_storage_options("hdfs:///bucket/file.csv", {"protocol": "collide"})
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def test_infer_simple():
|
|
221
|
+
out = infer_storage_options("//mnt/datasets/test.csv")
|
|
222
|
+
assert out["protocol"] == "file"
|
|
223
|
+
assert out["path"] == "//mnt/datasets/test.csv"
|
|
224
|
+
assert out.get("host", None) is None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@pytest.mark.parametrize(
|
|
228
|
+
"urlpath, expected_path",
|
|
229
|
+
(
|
|
230
|
+
(r"c:\foo\bar", r"c:\foo\bar"),
|
|
231
|
+
(r"C:\\foo\bar", r"C:\\foo\bar"),
|
|
232
|
+
(r"c:/foo/bar", r"c:/foo/bar"),
|
|
233
|
+
(r"file:///c|\foo\bar", r"c:\foo\bar"),
|
|
234
|
+
(r"file:///C|/foo/bar", r"C:/foo/bar"),
|
|
235
|
+
(r"file:///C:/foo/bar", r"C:/foo/bar"),
|
|
236
|
+
),
|
|
237
|
+
)
|
|
238
|
+
def test_infer_storage_options_c(urlpath, expected_path):
|
|
239
|
+
so = infer_storage_options(urlpath)
|
|
240
|
+
assert so["protocol"] == "file"
|
|
241
|
+
assert so["path"] == expected_path
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@pytest.mark.parametrize(
|
|
245
|
+
"paths, out",
|
|
246
|
+
(
|
|
247
|
+
(["/more/dir/", "/more/dir/two", "/more/one", "/more/three"], "/more"),
|
|
248
|
+
(["/", "", "/"], ""),
|
|
249
|
+
(["/", "/"], "/"),
|
|
250
|
+
(["/more/", "/"], ""),
|
|
251
|
+
(["/more/", "/more"], "/more"),
|
|
252
|
+
(["more/dir/", "more/dir/two", "more/one", "more/three"], "more"),
|
|
253
|
+
),
|
|
254
|
+
)
|
|
255
|
+
def test_common_prefix(paths, out):
|
|
256
|
+
assert common_prefix(paths) == out
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
@pytest.mark.parametrize(
|
|
260
|
+
"paths, other, exists, expected",
|
|
261
|
+
(
|
|
262
|
+
(["/path1"], "/path2", False, ["/path2"]),
|
|
263
|
+
(["/path1"], "/path2", True, ["/path2/path1"]),
|
|
264
|
+
(["/path1"], "/path2", False, ["/path2"]),
|
|
265
|
+
(["/path1"], "/path2/", True, ["/path2/path1"]),
|
|
266
|
+
(["/path1"], ["/path2"], False, ["/path2"]),
|
|
267
|
+
(["/path1"], ["/path2"], True, ["/path2"]),
|
|
268
|
+
(["/path1", "/path2"], "/path2", False, ["/path2/path1", "/path2/path2"]),
|
|
269
|
+
(["/path1", "/path2"], "/path2", True, ["/path2/path1", "/path2/path2"]),
|
|
270
|
+
(
|
|
271
|
+
["/more/path1", "/more/path2"],
|
|
272
|
+
"/path2",
|
|
273
|
+
False,
|
|
274
|
+
["/path2/path1", "/path2/path2"],
|
|
275
|
+
),
|
|
276
|
+
(
|
|
277
|
+
["/more/path1", "/more/path2"],
|
|
278
|
+
"/path2",
|
|
279
|
+
True,
|
|
280
|
+
["/path2/more/path1", "/path2/more/path2"],
|
|
281
|
+
),
|
|
282
|
+
(
|
|
283
|
+
["/more/path1", "/more/path2"],
|
|
284
|
+
"/path2",
|
|
285
|
+
False,
|
|
286
|
+
["/path2/path1", "/path2/path2"],
|
|
287
|
+
),
|
|
288
|
+
(
|
|
289
|
+
["/more/path1", "/more/path2"],
|
|
290
|
+
"/path2",
|
|
291
|
+
True,
|
|
292
|
+
["/path2/more/path1", "/path2/more/path2"],
|
|
293
|
+
),
|
|
294
|
+
(
|
|
295
|
+
["/more/path1", "/more/path2"],
|
|
296
|
+
"/path2/",
|
|
297
|
+
False,
|
|
298
|
+
["/path2/path1", "/path2/path2"],
|
|
299
|
+
),
|
|
300
|
+
(
|
|
301
|
+
["/more/path1", "/more/path2"],
|
|
302
|
+
"/path2/",
|
|
303
|
+
True,
|
|
304
|
+
["/path2/more/path1", "/path2/more/path2"],
|
|
305
|
+
),
|
|
306
|
+
(
|
|
307
|
+
["/more/path1", "/diff/path2"],
|
|
308
|
+
"/path2/",
|
|
309
|
+
False,
|
|
310
|
+
["/path2/more/path1", "/path2/diff/path2"],
|
|
311
|
+
),
|
|
312
|
+
(
|
|
313
|
+
["/more/path1", "/diff/path2"],
|
|
314
|
+
"/path2/",
|
|
315
|
+
True,
|
|
316
|
+
["/path2/more/path1", "/path2/diff/path2"],
|
|
317
|
+
),
|
|
318
|
+
(["a", "b/", "b/c"], "dest/", False, ["dest/a", "dest/b/", "dest/b/c"]),
|
|
319
|
+
(
|
|
320
|
+
["/a", "/b/", "/b/c"],
|
|
321
|
+
"dest/",
|
|
322
|
+
False,
|
|
323
|
+
["dest/a", "dest/b/", "dest/b/c"],
|
|
324
|
+
),
|
|
325
|
+
),
|
|
326
|
+
)
|
|
327
|
+
def test_other_paths(paths, other, exists, expected):
|
|
328
|
+
assert other_paths(paths, other, exists) == expected
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def test_log():
|
|
332
|
+
import logging
|
|
333
|
+
|
|
334
|
+
logger = setup_logging(logger_name="fsspec.test")
|
|
335
|
+
assert logger.level == logging.DEBUG
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@pytest.mark.parametrize(
|
|
339
|
+
"par",
|
|
340
|
+
[
|
|
341
|
+
("afile", "file"),
|
|
342
|
+
("file://afile", "file"),
|
|
343
|
+
("noproto://afile", "noproto"),
|
|
344
|
+
("noproto::stuff", "noproto"),
|
|
345
|
+
("simplecache::stuff", "simplecache"),
|
|
346
|
+
("simplecache://stuff", "simplecache"),
|
|
347
|
+
("s3://afile", "s3"),
|
|
348
|
+
(Path("afile"), "file"),
|
|
349
|
+
],
|
|
350
|
+
)
|
|
351
|
+
def test_get_protocol(par):
|
|
352
|
+
url, outcome = par
|
|
353
|
+
assert get_protocol(url) == outcome
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
@pytest.mark.parametrize(
|
|
357
|
+
"par",
|
|
358
|
+
[
|
|
359
|
+
("afile", True),
|
|
360
|
+
("file://afile", True),
|
|
361
|
+
("noproto://afile", False),
|
|
362
|
+
("noproto::stuff", False),
|
|
363
|
+
("simplecache::stuff", True),
|
|
364
|
+
("simplecache://stuff", True),
|
|
365
|
+
(Path("afile"), True),
|
|
366
|
+
],
|
|
367
|
+
)
|
|
368
|
+
def test_can_local(par):
|
|
369
|
+
url, outcome = par
|
|
370
|
+
assert can_be_local(url) == outcome
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def test_mirror_from():
|
|
374
|
+
mock = Mock()
|
|
375
|
+
mock.attr = 1
|
|
376
|
+
|
|
377
|
+
@mirror_from("client", ["attr", "func_1", "func_2"])
|
|
378
|
+
class Real:
|
|
379
|
+
@property
|
|
380
|
+
def client(self):
|
|
381
|
+
return mock
|
|
382
|
+
|
|
383
|
+
def func_2(self):
|
|
384
|
+
assert False, "have to overwrite this"
|
|
385
|
+
|
|
386
|
+
def func_3(self):
|
|
387
|
+
return "should succeed"
|
|
388
|
+
|
|
389
|
+
obj = Real()
|
|
390
|
+
assert obj.attr == mock.attr
|
|
391
|
+
|
|
392
|
+
obj.func_1()
|
|
393
|
+
mock.func_1.assert_called()
|
|
394
|
+
|
|
395
|
+
obj.func_2(1, 2)
|
|
396
|
+
mock.func_2.assert_called_with(1, 2)
|
|
397
|
+
|
|
398
|
+
assert obj.func_3() == "should succeed"
|
|
399
|
+
mock.func_3.assert_not_called()
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
@pytest.mark.parametrize("max_gap", [0, 32])
|
|
403
|
+
@pytest.mark.parametrize("max_block", [None, 128])
|
|
404
|
+
def test_merge_offset_ranges(max_gap, max_block):
|
|
405
|
+
# Input ranges
|
|
406
|
+
# (Using out-of-order ranges for full coverage)
|
|
407
|
+
paths = ["foo", "bar", "bar", "bar", "foo"]
|
|
408
|
+
starts = [0, 0, 512, 64, 32]
|
|
409
|
+
ends = [32, 32, 1024, 256, 64]
|
|
410
|
+
|
|
411
|
+
# Call merge_offset_ranges
|
|
412
|
+
(
|
|
413
|
+
result_paths,
|
|
414
|
+
result_starts,
|
|
415
|
+
result_ends,
|
|
416
|
+
) = merge_offset_ranges(
|
|
417
|
+
paths,
|
|
418
|
+
starts,
|
|
419
|
+
ends,
|
|
420
|
+
max_gap=max_gap,
|
|
421
|
+
max_block=max_block,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
# Check result
|
|
425
|
+
if max_block is None and max_gap == 32:
|
|
426
|
+
expect_paths = ["bar", "bar", "foo"]
|
|
427
|
+
expect_starts = [0, 512, 0]
|
|
428
|
+
expect_ends = [256, 1024, 64]
|
|
429
|
+
else:
|
|
430
|
+
expect_paths = ["bar", "bar", "bar", "foo"]
|
|
431
|
+
expect_starts = [0, 64, 512, 0]
|
|
432
|
+
expect_ends = [32, 256, 1024, 64]
|
|
433
|
+
|
|
434
|
+
assert expect_paths == result_paths
|
|
435
|
+
assert expect_starts == result_starts
|
|
436
|
+
assert expect_ends == result_ends
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def test_size():
|
|
440
|
+
f = io.BytesIO(b"hello")
|
|
441
|
+
assert fsspec.utils.file_size(f) == 5
|
|
442
|
+
assert f.tell() == 0
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
class _HasFspath:
|
|
446
|
+
def __fspath__(self):
|
|
447
|
+
return "foo"
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
class _HasPathAttr:
|
|
451
|
+
def __init__(self):
|
|
452
|
+
self.path = "foo"
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
@pytest.mark.parametrize(
|
|
456
|
+
"path,expected",
|
|
457
|
+
[
|
|
458
|
+
# coerce to string
|
|
459
|
+
("foo", "foo"),
|
|
460
|
+
(Path("foo"), "foo"),
|
|
461
|
+
(PurePath("foo"), "foo"),
|
|
462
|
+
(_HasFspath(), "foo"),
|
|
463
|
+
(_HasPathAttr(), "foo"),
|
|
464
|
+
# passthrough
|
|
465
|
+
(b"bytes", b"bytes"),
|
|
466
|
+
(None, None),
|
|
467
|
+
(1, 1),
|
|
468
|
+
(True, True),
|
|
469
|
+
(o := object(), o),
|
|
470
|
+
([], []),
|
|
471
|
+
((), ()),
|
|
472
|
+
(set(), set()),
|
|
473
|
+
],
|
|
474
|
+
)
|
|
475
|
+
def test_stringify_path(path, expected):
|
|
476
|
+
path = fsspec.utils.stringify_path(path)
|
|
477
|
+
|
|
478
|
+
assert path == expected
|
fsspec/utils.py
CHANGED
|
@@ -350,8 +350,6 @@ def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
|
|
350
350
|
return filepath
|
|
351
351
|
elif hasattr(filepath, "__fspath__"):
|
|
352
352
|
return filepath.__fspath__()
|
|
353
|
-
elif isinstance(filepath, pathlib.Path):
|
|
354
|
-
return str(filepath)
|
|
355
353
|
elif hasattr(filepath, "path"):
|
|
356
354
|
return filepath.path
|
|
357
355
|
else:
|