fsspec 2023.9.2__py3-none-any.whl → 2023.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fsspec/__init__.py +6 -1
- fsspec/_version.py +4 -4
- fsspec/archive.py +1 -1
- fsspec/asyn.py +35 -45
- fsspec/caching.py +161 -90
- fsspec/compression.py +2 -4
- fsspec/core.py +19 -6
- fsspec/fuse.py +2 -2
- fsspec/generic.py +5 -1
- fsspec/gui.py +4 -4
- fsspec/implementations/cached.py +105 -25
- fsspec/implementations/data.py +48 -0
- fsspec/implementations/ftp.py +6 -6
- fsspec/implementations/git.py +3 -3
- fsspec/implementations/github.py +3 -7
- fsspec/implementations/http.py +34 -47
- fsspec/implementations/jupyter.py +5 -5
- fsspec/implementations/libarchive.py +1 -2
- fsspec/implementations/local.py +8 -4
- fsspec/implementations/memory.py +1 -1
- fsspec/implementations/reference.py +67 -25
- fsspec/implementations/sftp.py +11 -11
- fsspec/implementations/smb.py +4 -5
- fsspec/implementations/webhdfs.py +28 -8
- fsspec/implementations/zip.py +2 -2
- fsspec/mapping.py +2 -2
- fsspec/registry.py +8 -6
- fsspec/spec.py +41 -55
- fsspec/tests/abstract/common.py +5 -5
- fsspec/transaction.py +8 -4
- fsspec/utils.py +204 -37
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/METADATA +7 -6
- fsspec-2023.12.0.dist-info/RECORD +54 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/WHEEL +1 -1
- fsspec/implementations/http_sync.py +0 -882
- fsspec-2023.9.2.dist-info/RECORD +0 -54
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/LICENSE +0 -0
- {fsspec-2023.9.2.dist-info → fsspec-2023.12.0.dist-info}/top_level.txt +0 -0
fsspec/utils.py
CHANGED
|
@@ -11,12 +11,32 @@ import tempfile
|
|
|
11
11
|
from functools import partial
|
|
12
12
|
from hashlib import md5
|
|
13
13
|
from importlib.metadata import version
|
|
14
|
+
from typing import (
|
|
15
|
+
IO,
|
|
16
|
+
TYPE_CHECKING,
|
|
17
|
+
Any,
|
|
18
|
+
Callable,
|
|
19
|
+
Iterable,
|
|
20
|
+
Iterator,
|
|
21
|
+
Sequence,
|
|
22
|
+
TypeVar,
|
|
23
|
+
)
|
|
14
24
|
from urllib.parse import urlsplit
|
|
15
25
|
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from typing_extensions import TypeGuard
|
|
28
|
+
|
|
29
|
+
from fsspec.spec import AbstractFileSystem
|
|
30
|
+
|
|
31
|
+
|
|
16
32
|
DEFAULT_BLOCK_SIZE = 5 * 2**20
|
|
17
33
|
|
|
34
|
+
T = TypeVar("T")
|
|
18
35
|
|
|
19
|
-
|
|
36
|
+
|
|
37
|
+
def infer_storage_options(
|
|
38
|
+
urlpath: str, inherit_storage_options: dict[str, Any] | None = None
|
|
39
|
+
) -> dict[str, Any]:
|
|
20
40
|
"""Infer storage options from URL path and merge it with existing storage
|
|
21
41
|
options.
|
|
22
42
|
|
|
@@ -68,7 +88,7 @@ def infer_storage_options(urlpath, inherit_storage_options=None):
|
|
|
68
88
|
# for HTTP, we don't want to parse, as requests will anyway
|
|
69
89
|
return {"protocol": protocol, "path": urlpath}
|
|
70
90
|
|
|
71
|
-
options = {"protocol": protocol, "path": path}
|
|
91
|
+
options: dict[str, Any] = {"protocol": protocol, "path": path}
|
|
72
92
|
|
|
73
93
|
if parsed_path.netloc:
|
|
74
94
|
# Parse `hostname` from netloc manually because `parsed_path.hostname`
|
|
@@ -98,7 +118,9 @@ def infer_storage_options(urlpath, inherit_storage_options=None):
|
|
|
98
118
|
return options
|
|
99
119
|
|
|
100
120
|
|
|
101
|
-
def update_storage_options(
|
|
121
|
+
def update_storage_options(
|
|
122
|
+
options: dict[str, Any], inherited: dict[str, Any] | None = None
|
|
123
|
+
) -> None:
|
|
102
124
|
if not inherited:
|
|
103
125
|
inherited = {}
|
|
104
126
|
collisions = set(options) & set(inherited)
|
|
@@ -106,8 +128,8 @@ def update_storage_options(options, inherited=None):
|
|
|
106
128
|
for collision in collisions:
|
|
107
129
|
if options.get(collision) != inherited.get(collision):
|
|
108
130
|
raise KeyError(
|
|
109
|
-
"Collision between inferred and specified storage "
|
|
110
|
-
"option:\n
|
|
131
|
+
f"Collision between inferred and specified storage "
|
|
132
|
+
f"option:\n{collision}"
|
|
111
133
|
)
|
|
112
134
|
options.update(inherited)
|
|
113
135
|
|
|
@@ -116,7 +138,7 @@ def update_storage_options(options, inherited=None):
|
|
|
116
138
|
compressions: dict[str, str] = {}
|
|
117
139
|
|
|
118
140
|
|
|
119
|
-
def infer_compression(filename):
|
|
141
|
+
def infer_compression(filename: str) -> str | None:
|
|
120
142
|
"""Infer compression, if available, from filename.
|
|
121
143
|
|
|
122
144
|
Infer a named compression type, if registered and available, from filename
|
|
@@ -126,9 +148,10 @@ def infer_compression(filename):
|
|
|
126
148
|
extension = os.path.splitext(filename)[-1].strip(".").lower()
|
|
127
149
|
if extension in compressions:
|
|
128
150
|
return compressions[extension]
|
|
151
|
+
return None
|
|
129
152
|
|
|
130
153
|
|
|
131
|
-
def build_name_function(max_int):
|
|
154
|
+
def build_name_function(max_int: float) -> Callable[[int], str]:
|
|
132
155
|
"""Returns a function that receives a single integer
|
|
133
156
|
and returns it as a string padded by enough zero characters
|
|
134
157
|
to align with maximum possible integer
|
|
@@ -151,13 +174,13 @@ def build_name_function(max_int):
|
|
|
151
174
|
|
|
152
175
|
pad_length = int(math.ceil(math.log10(max_int)))
|
|
153
176
|
|
|
154
|
-
def name_function(i):
|
|
177
|
+
def name_function(i: int) -> str:
|
|
155
178
|
return str(i).zfill(pad_length)
|
|
156
179
|
|
|
157
180
|
return name_function
|
|
158
181
|
|
|
159
182
|
|
|
160
|
-
def seek_delimiter(file, delimiter, blocksize):
|
|
183
|
+
def seek_delimiter(file: IO[bytes], delimiter: bytes, blocksize: int) -> bool:
|
|
161
184
|
r"""Seek current file to file start, file end, or byte after delimiter seq.
|
|
162
185
|
|
|
163
186
|
Seeks file to next chunk delimiter, where chunks are defined on file start,
|
|
@@ -186,7 +209,7 @@ def seek_delimiter(file, delimiter, blocksize):
|
|
|
186
209
|
|
|
187
210
|
# Interface is for binary IO, with delimiter as bytes, but initialize last
|
|
188
211
|
# with result of file.read to preserve compatibility with text IO.
|
|
189
|
-
last = None
|
|
212
|
+
last: bytes | None = None
|
|
190
213
|
while True:
|
|
191
214
|
current = file.read(blocksize)
|
|
192
215
|
if not current:
|
|
@@ -206,7 +229,13 @@ def seek_delimiter(file, delimiter, blocksize):
|
|
|
206
229
|
last = full[-len(delimiter) :]
|
|
207
230
|
|
|
208
231
|
|
|
209
|
-
def read_block(
|
|
232
|
+
def read_block(
|
|
233
|
+
f: IO[bytes],
|
|
234
|
+
offset: int,
|
|
235
|
+
length: int | None,
|
|
236
|
+
delimiter: bytes | None = None,
|
|
237
|
+
split_before: bool = False,
|
|
238
|
+
) -> bytes:
|
|
210
239
|
"""Read a block of bytes from a file
|
|
211
240
|
|
|
212
241
|
Parameters
|
|
@@ -267,11 +296,14 @@ def read_block(f, offset, length, delimiter=None, split_before=False):
|
|
|
267
296
|
length = end - start
|
|
268
297
|
|
|
269
298
|
f.seek(offset)
|
|
299
|
+
|
|
300
|
+
# TODO: allow length to be None and read to the end of the file?
|
|
301
|
+
assert length is not None
|
|
270
302
|
b = f.read(length)
|
|
271
303
|
return b
|
|
272
304
|
|
|
273
305
|
|
|
274
|
-
def tokenize(*args, **kwargs):
|
|
306
|
+
def tokenize(*args: Any, **kwargs: Any) -> str:
|
|
275
307
|
"""Deterministic token
|
|
276
308
|
|
|
277
309
|
(modified from dask.base)
|
|
@@ -285,13 +317,14 @@ def tokenize(*args, **kwargs):
|
|
|
285
317
|
if kwargs:
|
|
286
318
|
args += (kwargs,)
|
|
287
319
|
try:
|
|
288
|
-
|
|
320
|
+
h = md5(str(args).encode())
|
|
289
321
|
except ValueError:
|
|
290
322
|
# FIPS systems: https://github.com/fsspec/filesystem_spec/issues/380
|
|
291
|
-
|
|
323
|
+
h = md5(str(args).encode(), usedforsecurity=False)
|
|
324
|
+
return h.hexdigest()
|
|
292
325
|
|
|
293
326
|
|
|
294
|
-
def stringify_path(filepath):
|
|
327
|
+
def stringify_path(filepath: str | os.PathLike[str] | pathlib.Path) -> str:
|
|
295
328
|
"""Attempt to convert a path-like object to a string.
|
|
296
329
|
|
|
297
330
|
Parameters
|
|
@@ -322,16 +355,18 @@ def stringify_path(filepath):
|
|
|
322
355
|
elif hasattr(filepath, "path"):
|
|
323
356
|
return filepath.path
|
|
324
357
|
else:
|
|
325
|
-
return filepath
|
|
358
|
+
return filepath # type: ignore[return-value]
|
|
326
359
|
|
|
327
360
|
|
|
328
|
-
def make_instance(
|
|
361
|
+
def make_instance(
|
|
362
|
+
cls: Callable[..., T], args: Sequence[Any], kwargs: dict[str, Any]
|
|
363
|
+
) -> T:
|
|
329
364
|
inst = cls(*args, **kwargs)
|
|
330
|
-
inst._determine_worker()
|
|
365
|
+
inst._determine_worker() # type: ignore[attr-defined]
|
|
331
366
|
return inst
|
|
332
367
|
|
|
333
368
|
|
|
334
|
-
def common_prefix(paths):
|
|
369
|
+
def common_prefix(paths: Iterable[str]) -> str:
|
|
335
370
|
"""For a list of paths, find the shortest prefix common to all"""
|
|
336
371
|
parts = [p.split("/") for p in paths]
|
|
337
372
|
lmax = min(len(p) for p in parts)
|
|
@@ -344,7 +379,12 @@ def common_prefix(paths):
|
|
|
344
379
|
return "/".join(parts[0][:i])
|
|
345
380
|
|
|
346
381
|
|
|
347
|
-
def other_paths(
|
|
382
|
+
def other_paths(
|
|
383
|
+
paths: list[str],
|
|
384
|
+
path2: str | list[str],
|
|
385
|
+
exists: bool = False,
|
|
386
|
+
flatten: bool = False,
|
|
387
|
+
) -> list[str]:
|
|
348
388
|
"""In bulk file operations, construct a new file tree from a list of files
|
|
349
389
|
|
|
350
390
|
Parameters
|
|
@@ -384,25 +424,26 @@ def other_paths(paths, path2, exists=False, flatten=False):
|
|
|
384
424
|
return path2
|
|
385
425
|
|
|
386
426
|
|
|
387
|
-
def is_exception(obj):
|
|
427
|
+
def is_exception(obj: Any) -> bool:
|
|
388
428
|
return isinstance(obj, BaseException)
|
|
389
429
|
|
|
390
430
|
|
|
391
|
-
def isfilelike(f):
|
|
431
|
+
def isfilelike(f: Any) -> TypeGuard[IO[bytes]]:
|
|
392
432
|
for attr in ["read", "close", "tell"]:
|
|
393
433
|
if not hasattr(f, attr):
|
|
394
434
|
return False
|
|
395
435
|
return True
|
|
396
436
|
|
|
397
437
|
|
|
398
|
-
def get_protocol(url):
|
|
438
|
+
def get_protocol(url: str) -> str:
|
|
439
|
+
url = stringify_path(url)
|
|
399
440
|
parts = re.split(r"(\:\:|\://)", url, 1)
|
|
400
441
|
if len(parts) > 1:
|
|
401
442
|
return parts[0]
|
|
402
443
|
return "file"
|
|
403
444
|
|
|
404
445
|
|
|
405
|
-
def can_be_local(path):
|
|
446
|
+
def can_be_local(path: str) -> bool:
|
|
406
447
|
"""Can the given URL be used with open_local?"""
|
|
407
448
|
from fsspec import get_filesystem_class
|
|
408
449
|
|
|
@@ -413,7 +454,7 @@ def can_be_local(path):
|
|
|
413
454
|
return False
|
|
414
455
|
|
|
415
456
|
|
|
416
|
-
def get_package_version_without_import(name):
|
|
457
|
+
def get_package_version_without_import(name: str) -> str | None:
|
|
417
458
|
"""For given package name, try to find the version without importing it
|
|
418
459
|
|
|
419
460
|
Import and package.__version__ is still the backup here, so an import
|
|
@@ -439,7 +480,12 @@ def get_package_version_without_import(name):
|
|
|
439
480
|
return None
|
|
440
481
|
|
|
441
482
|
|
|
442
|
-
def setup_logging(
|
|
483
|
+
def setup_logging(
|
|
484
|
+
logger: logging.Logger | None = None,
|
|
485
|
+
logger_name: str | None = None,
|
|
486
|
+
level: str = "DEBUG",
|
|
487
|
+
clear: bool = True,
|
|
488
|
+
) -> logging.Logger:
|
|
443
489
|
if logger is None and logger_name is None:
|
|
444
490
|
raise ValueError("Provide either logger object or logger name")
|
|
445
491
|
logger = logger or logging.getLogger(logger_name)
|
|
@@ -455,20 +501,22 @@ def setup_logging(logger=None, logger_name=None, level="DEBUG", clear=True):
|
|
|
455
501
|
return logger
|
|
456
502
|
|
|
457
503
|
|
|
458
|
-
def _unstrip_protocol(name, fs):
|
|
504
|
+
def _unstrip_protocol(name: str, fs: AbstractFileSystem) -> str:
|
|
459
505
|
return fs.unstrip_protocol(name)
|
|
460
506
|
|
|
461
507
|
|
|
462
|
-
def mirror_from(
|
|
508
|
+
def mirror_from(
|
|
509
|
+
origin_name: str, methods: Iterable[str]
|
|
510
|
+
) -> Callable[[type[T]], type[T]]:
|
|
463
511
|
"""Mirror attributes and methods from the given
|
|
464
512
|
origin_name attribute of the instance to the
|
|
465
513
|
decorated class"""
|
|
466
514
|
|
|
467
|
-
def origin_getter(method, self):
|
|
515
|
+
def origin_getter(method: str, self: Any) -> Any:
|
|
468
516
|
origin = getattr(self, origin_name)
|
|
469
517
|
return getattr(origin, method)
|
|
470
518
|
|
|
471
|
-
def wrapper(cls):
|
|
519
|
+
def wrapper(cls: type[T]) -> type[T]:
|
|
472
520
|
for method in methods:
|
|
473
521
|
wrapped_method = partial(origin_getter, method)
|
|
474
522
|
setattr(cls, method, property(wrapped_method))
|
|
@@ -478,11 +526,18 @@ def mirror_from(origin_name, methods):
|
|
|
478
526
|
|
|
479
527
|
|
|
480
528
|
@contextlib.contextmanager
|
|
481
|
-
def nullcontext(obj):
|
|
529
|
+
def nullcontext(obj: T) -> Iterator[T]:
|
|
482
530
|
yield obj
|
|
483
531
|
|
|
484
532
|
|
|
485
|
-
def merge_offset_ranges(
|
|
533
|
+
def merge_offset_ranges(
|
|
534
|
+
paths: list[str],
|
|
535
|
+
starts: list[int] | int,
|
|
536
|
+
ends: list[int] | int,
|
|
537
|
+
max_gap: int = 0,
|
|
538
|
+
max_block: int | None = None,
|
|
539
|
+
sort: bool = True,
|
|
540
|
+
) -> tuple[list[str], list[int], list[int]]:
|
|
486
541
|
"""Merge adjacent byte-offset ranges when the inter-range
|
|
487
542
|
gap is <= `max_gap`, and when the merged byte range does not
|
|
488
543
|
exceed `max_block` (if specified). By default, this function
|
|
@@ -496,7 +551,7 @@ def merge_offset_ranges(paths, starts, ends, max_gap=0, max_block=None, sort=Tru
|
|
|
496
551
|
if not isinstance(starts, list):
|
|
497
552
|
starts = [starts] * len(paths)
|
|
498
553
|
if not isinstance(ends, list):
|
|
499
|
-
ends = [
|
|
554
|
+
ends = [ends] * len(paths)
|
|
500
555
|
if len(starts) != len(paths) or len(ends) != len(paths):
|
|
501
556
|
raise ValueError
|
|
502
557
|
|
|
@@ -507,14 +562,14 @@ def merge_offset_ranges(paths, starts, ends, max_gap=0, max_block=None, sort=Tru
|
|
|
507
562
|
starts = [s or 0 for s in starts]
|
|
508
563
|
# Sort by paths and then ranges if `sort=True`
|
|
509
564
|
if sort:
|
|
510
|
-
paths, starts, ends =
|
|
565
|
+
paths, starts, ends = (
|
|
511
566
|
list(v)
|
|
512
567
|
for v in zip(
|
|
513
568
|
*sorted(
|
|
514
569
|
zip(paths, starts, ends),
|
|
515
570
|
)
|
|
516
571
|
)
|
|
517
|
-
|
|
572
|
+
)
|
|
518
573
|
|
|
519
574
|
if paths:
|
|
520
575
|
# Loop through the coupled `paths`, `starts`, and
|
|
@@ -528,7 +583,7 @@ def merge_offset_ranges(paths, starts, ends, max_gap=0, max_block=None, sort=Tru
|
|
|
528
583
|
elif (
|
|
529
584
|
paths[i] != paths[i - 1]
|
|
530
585
|
or ((starts[i] - new_ends[-1]) > max_gap)
|
|
531
|
-
or (
|
|
586
|
+
or (max_block is not None and (ends[i] - new_starts[-1]) > max_block)
|
|
532
587
|
):
|
|
533
588
|
# Cannot merge with previous block.
|
|
534
589
|
# Add new `paths`, `starts`, and `ends` elements
|
|
@@ -545,7 +600,7 @@ def merge_offset_ranges(paths, starts, ends, max_gap=0, max_block=None, sort=Tru
|
|
|
545
600
|
return paths, starts, ends
|
|
546
601
|
|
|
547
602
|
|
|
548
|
-
def file_size(filelike):
|
|
603
|
+
def file_size(filelike: IO[bytes]) -> int:
|
|
549
604
|
"""Find length of any open read-mode file-like"""
|
|
550
605
|
pos = filelike.tell()
|
|
551
606
|
try:
|
|
@@ -573,3 +628,115 @@ def atomic_write(path: str, mode: str = "wb"):
|
|
|
573
628
|
raise
|
|
574
629
|
else:
|
|
575
630
|
os.replace(fn, path)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _translate(pat, STAR, QUESTION_MARK):
|
|
634
|
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
|
635
|
+
res: list[str] = []
|
|
636
|
+
add = res.append
|
|
637
|
+
i, n = 0, len(pat)
|
|
638
|
+
while i < n:
|
|
639
|
+
c = pat[i]
|
|
640
|
+
i = i + 1
|
|
641
|
+
if c == "*":
|
|
642
|
+
# compress consecutive `*` into one
|
|
643
|
+
if (not res) or res[-1] is not STAR:
|
|
644
|
+
add(STAR)
|
|
645
|
+
elif c == "?":
|
|
646
|
+
add(QUESTION_MARK)
|
|
647
|
+
elif c == "[":
|
|
648
|
+
j = i
|
|
649
|
+
if j < n and pat[j] == "!":
|
|
650
|
+
j = j + 1
|
|
651
|
+
if j < n and pat[j] == "]":
|
|
652
|
+
j = j + 1
|
|
653
|
+
while j < n and pat[j] != "]":
|
|
654
|
+
j = j + 1
|
|
655
|
+
if j >= n:
|
|
656
|
+
add("\\[")
|
|
657
|
+
else:
|
|
658
|
+
stuff = pat[i:j]
|
|
659
|
+
if "-" not in stuff:
|
|
660
|
+
stuff = stuff.replace("\\", r"\\")
|
|
661
|
+
else:
|
|
662
|
+
chunks = []
|
|
663
|
+
k = i + 2 if pat[i] == "!" else i + 1
|
|
664
|
+
while True:
|
|
665
|
+
k = pat.find("-", k, j)
|
|
666
|
+
if k < 0:
|
|
667
|
+
break
|
|
668
|
+
chunks.append(pat[i:k])
|
|
669
|
+
i = k + 1
|
|
670
|
+
k = k + 3
|
|
671
|
+
chunk = pat[i:j]
|
|
672
|
+
if chunk:
|
|
673
|
+
chunks.append(chunk)
|
|
674
|
+
else:
|
|
675
|
+
chunks[-1] += "-"
|
|
676
|
+
# Remove empty ranges -- invalid in RE.
|
|
677
|
+
for k in range(len(chunks) - 1, 0, -1):
|
|
678
|
+
if chunks[k - 1][-1] > chunks[k][0]:
|
|
679
|
+
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
|
|
680
|
+
del chunks[k]
|
|
681
|
+
# Escape backslashes and hyphens for set difference (--).
|
|
682
|
+
# Hyphens that create ranges shouldn't be escaped.
|
|
683
|
+
stuff = "-".join(
|
|
684
|
+
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
|
|
685
|
+
)
|
|
686
|
+
# Escape set operations (&&, ~~ and ||).
|
|
687
|
+
stuff = re.sub(r"([&~|])", r"\\\1", stuff)
|
|
688
|
+
i = j + 1
|
|
689
|
+
if not stuff:
|
|
690
|
+
# Empty range: never match.
|
|
691
|
+
add("(?!)")
|
|
692
|
+
elif stuff == "!":
|
|
693
|
+
# Negated empty range: match any character.
|
|
694
|
+
add(".")
|
|
695
|
+
else:
|
|
696
|
+
if stuff[0] == "!":
|
|
697
|
+
stuff = "^" + stuff[1:]
|
|
698
|
+
elif stuff[0] in ("^", "["):
|
|
699
|
+
stuff = "\\" + stuff
|
|
700
|
+
add(f"[{stuff}]")
|
|
701
|
+
else:
|
|
702
|
+
add(re.escape(c))
|
|
703
|
+
assert i == n
|
|
704
|
+
return res
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def glob_translate(pat):
|
|
708
|
+
# Copied from: https://github.com/python/cpython/pull/106703.
|
|
709
|
+
# The keyword parameters' values are fixed to:
|
|
710
|
+
# recursive=True, include_hidden=True, seps=None
|
|
711
|
+
"""Translate a pathname with shell wildcards to a regular expression."""
|
|
712
|
+
if os.path.altsep:
|
|
713
|
+
seps = os.path.sep + os.path.altsep
|
|
714
|
+
else:
|
|
715
|
+
seps = os.path.sep
|
|
716
|
+
escaped_seps = "".join(map(re.escape, seps))
|
|
717
|
+
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
|
|
718
|
+
not_sep = f"[^{escaped_seps}]"
|
|
719
|
+
one_last_segment = f"{not_sep}+"
|
|
720
|
+
one_segment = f"{one_last_segment}{any_sep}"
|
|
721
|
+
any_segments = f"(?:.+{any_sep})?"
|
|
722
|
+
any_last_segments = ".*"
|
|
723
|
+
results = []
|
|
724
|
+
parts = re.split(any_sep, pat)
|
|
725
|
+
last_part_idx = len(parts) - 1
|
|
726
|
+
for idx, part in enumerate(parts):
|
|
727
|
+
if part == "*":
|
|
728
|
+
results.append(one_segment if idx < last_part_idx else one_last_segment)
|
|
729
|
+
continue
|
|
730
|
+
if part == "**":
|
|
731
|
+
results.append(any_segments if idx < last_part_idx else any_last_segments)
|
|
732
|
+
continue
|
|
733
|
+
elif "**" in part:
|
|
734
|
+
raise ValueError(
|
|
735
|
+
"Invalid pattern: '**' can only be an entire path component"
|
|
736
|
+
)
|
|
737
|
+
if part:
|
|
738
|
+
results.extend(_translate(part, f"{not_sep}*", not_sep))
|
|
739
|
+
if idx < last_part_idx:
|
|
740
|
+
results.append(any_sep)
|
|
741
|
+
res = "".join(results)
|
|
742
|
+
return rf"(?s:{res})\Z"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: fsspec
|
|
3
|
-
Version: 2023.
|
|
3
|
+
Version: 2023.12.0
|
|
4
4
|
Summary: File-system specification
|
|
5
5
|
Home-page: https://github.com/fsspec/filesystem_spec
|
|
6
6
|
Maintainer: Martin Durant
|
|
@@ -25,7 +25,7 @@ Requires-Dist: adlfs ; extra == 'abfs'
|
|
|
25
25
|
Provides-Extra: adl
|
|
26
26
|
Requires-Dist: adlfs ; extra == 'adl'
|
|
27
27
|
Provides-Extra: arrow
|
|
28
|
-
Requires-Dist: pyarrow
|
|
28
|
+
Requires-Dist: pyarrow >=1 ; extra == 'arrow'
|
|
29
29
|
Provides-Extra: dask
|
|
30
30
|
Requires-Dist: dask ; extra == 'dask'
|
|
31
31
|
Requires-Dist: distributed ; extra == 'dask'
|
|
@@ -39,7 +39,7 @@ Requires-Dist: dropbox ; extra == 'dropbox'
|
|
|
39
39
|
Provides-Extra: entrypoints
|
|
40
40
|
Provides-Extra: full
|
|
41
41
|
Requires-Dist: adlfs ; extra == 'full'
|
|
42
|
-
Requires-Dist: aiohttp
|
|
42
|
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'full'
|
|
43
43
|
Requires-Dist: dask ; extra == 'full'
|
|
44
44
|
Requires-Dist: distributed ; extra == 'full'
|
|
45
45
|
Requires-Dist: dropbox ; extra == 'full'
|
|
@@ -50,7 +50,7 @@ Requires-Dist: libarchive-c ; extra == 'full'
|
|
|
50
50
|
Requires-Dist: ocifs ; extra == 'full'
|
|
51
51
|
Requires-Dist: panel ; extra == 'full'
|
|
52
52
|
Requires-Dist: paramiko ; extra == 'full'
|
|
53
|
-
Requires-Dist: pyarrow
|
|
53
|
+
Requires-Dist: pyarrow >=1 ; extra == 'full'
|
|
54
54
|
Requires-Dist: pygit2 ; extra == 'full'
|
|
55
55
|
Requires-Dist: requests ; extra == 'full'
|
|
56
56
|
Requires-Dist: s3fs ; extra == 'full'
|
|
@@ -69,10 +69,10 @@ Requires-Dist: gcsfs ; extra == 'gs'
|
|
|
69
69
|
Provides-Extra: gui
|
|
70
70
|
Requires-Dist: panel ; extra == 'gui'
|
|
71
71
|
Provides-Extra: hdfs
|
|
72
|
-
Requires-Dist: pyarrow
|
|
72
|
+
Requires-Dist: pyarrow >=1 ; extra == 'hdfs'
|
|
73
73
|
Provides-Extra: http
|
|
74
74
|
Requires-Dist: requests ; extra == 'http'
|
|
75
|
-
Requires-Dist: aiohttp
|
|
75
|
+
Requires-Dist: aiohttp !=4.0.0a0,!=4.0.0a1 ; extra == 'http'
|
|
76
76
|
Provides-Extra: libarchive
|
|
77
77
|
Requires-Dist: libarchive-c ; extra == 'libarchive'
|
|
78
78
|
Provides-Extra: oci
|
|
@@ -94,6 +94,7 @@ Requires-Dist: tqdm ; extra == 'tqdm'
|
|
|
94
94
|
[](https://anaconda.org/conda-forge/fsspec)
|
|
95
95
|

|
|
96
96
|
[](https://filesystem-spec.readthedocs.io/en/latest/?badge=latest)
|
|
97
|
+
[](https://pepy.tech/project/fsspec)
|
|
97
98
|
|
|
98
99
|
A specification for pythonic filesystems.
|
|
99
100
|
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
fsspec/__init__.py,sha256=2kT62GfFK-AjgS-LgwSsCo_VA2IePvsyv8Ash5oiaFA,1982
|
|
2
|
+
fsspec/_version.py,sha256=jAwgcB8zkKMbpk-J9sbW492ytNhl4CO0dj1nip8cBPg,501
|
|
3
|
+
fsspec/archive.py,sha256=42f8FEAyP0LIvAhdzS1djyxEyCvlWlnzPo7igWFflYM,2400
|
|
4
|
+
fsspec/asyn.py,sha256=wx6vr5eBJYdW7a2cyv-LkfWu5dCDCcAjcDKjp3ylgR0,36154
|
|
5
|
+
fsspec/caching.py,sha256=N45pzJdD4w5FOX_sxGvHWirggPNB66JTGP1HH6fpSck,28781
|
|
6
|
+
fsspec/callbacks.py,sha256=qmD1v-WWxWmTmcUkEadq-_F_n3OGp9JYarjupUq_j3o,6358
|
|
7
|
+
fsspec/compression.py,sha256=Zrbbb_m2SCF427BMJRYbDKMuSZIIV2YqteoS7AdR8Sc,4867
|
|
8
|
+
fsspec/config.py,sha256=LF4Zmu1vhJW7Je9Q-cwkRc3xP7Rhyy7Xnwj26Z6sv2g,4279
|
|
9
|
+
fsspec/conftest.py,sha256=fVfx-NLrH_OZS1TIpYNoPzM7efEcMoL62reHOdYeFCA,1245
|
|
10
|
+
fsspec/core.py,sha256=9oaIXN4PbbCRo4RZ-r7ZZ6LLm4vMyDwqv-x53kBfOmQ,22376
|
|
11
|
+
fsspec/dircache.py,sha256=YzogWJrhEastHU7vWz-cJiJ7sdtLXFXhEpInGKd4EcM,2717
|
|
12
|
+
fsspec/exceptions.py,sha256=s5eA2wIwzj-aeV0i_KDXsBaIhJJRKzmMGUGwuBHTnS4,348
|
|
13
|
+
fsspec/fuse.py,sha256=66amOa6wdIbS0DMhhfAPUoOB37HPorfXD1izV0prmTY,10145
|
|
14
|
+
fsspec/generic.py,sha256=StTDGXR-r9DJ7YEformhxfoexfewt_JkEUWN0oFRfbA,13373
|
|
15
|
+
fsspec/gui.py,sha256=BEVFplRsQyakNeCWU-vyZBD-16x_flEe0XiDxXparEU,13913
|
|
16
|
+
fsspec/mapping.py,sha256=WFEXRWxujQwfzzkRP5tpdIE0265okAtlP97qFZGvV1k,8165
|
|
17
|
+
fsspec/parquet.py,sha256=i4H3EU3K1Q6jp8sqjFji6a6gKnlOEZufaa7DRNE5X-4,19516
|
|
18
|
+
fsspec/registry.py,sha256=-dl7sh2tsfhMA2uxz5KQDsPFehQTgMJIbVjNq6QLoKU,11145
|
|
19
|
+
fsspec/spec.py,sha256=kfZpvKoh-fftKG6cOkOi2k0PJJwRqV4ZX_NElCBdcB8,66154
|
|
20
|
+
fsspec/transaction.py,sha256=jeexB-H6Aw_gN6Z7hoKKe6v8zizITq39-gyTgpipIKE,2251
|
|
21
|
+
fsspec/utils.py,sha256=_VX_0VwDtoAFSjMYrxvJvnPNX9FMoHO5BlFHXJ0bHFI,23053
|
|
22
|
+
fsspec/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
+
fsspec/implementations/arrow.py,sha256=1d-c5KceQJxm8QXML8fFXHvQx0wstG-tNJNsrgMX_CI,8240
|
|
24
|
+
fsspec/implementations/cache_mapper.py,sha256=nE_sY3vw-jJbeBcAP6NGtacP3jHW_7EcG3yUSf0A-4Y,2502
|
|
25
|
+
fsspec/implementations/cache_metadata.py,sha256=ZvyA7Y3KK-5Ct4E5pELzD6mH_5T03XqaKVT96qYDADU,8576
|
|
26
|
+
fsspec/implementations/cached.py,sha256=jCQSAIiO7M8OOmwG4cCYn4LGvMVCbldC9j7GeonwoEc,30238
|
|
27
|
+
fsspec/implementations/dask.py,sha256=CXZbJzIVOhKV8ILcxuy3bTvcacCueAbyQxmvAkbPkrk,4466
|
|
28
|
+
fsspec/implementations/data.py,sha256=Oti0dKzyeadnVIedo3s8CADoh9bNM-96_6viTEYr4lo,1245
|
|
29
|
+
fsspec/implementations/dbfs.py,sha256=0ndCE2OQqrWv6Y8ETufxOQ9ymIIO2JA_Q82bnilqTaw,14660
|
|
30
|
+
fsspec/implementations/dirfs.py,sha256=8EEgKin5JgFBqzHaKig7ipiFAZJvbChUX_vpC_jagoY,11136
|
|
31
|
+
fsspec/implementations/ftp.py,sha256=FzcHeieyda-ai_D8w4YKCzvI4gshuFYlBACBuEIx2Nk,11419
|
|
32
|
+
fsspec/implementations/git.py,sha256=vKGI-Vd5q4H2RrvhebkPc9NwlfkZ980OUGhebeCw-M0,4034
|
|
33
|
+
fsspec/implementations/github.py,sha256=hCisC1vXzZ9kP1UnyGz2Ba8c9cS2JmSGFHtgHG_2Gqw,7190
|
|
34
|
+
fsspec/implementations/http.py,sha256=G3mBiMLDoYddwwzFg5B2zagmDX0r_2jFktrL3BqLU34,29248
|
|
35
|
+
fsspec/implementations/jupyter.py,sha256=B2uj7OEm7yIk-vRSsO37_ND0t0EBvn4B-Su43ibN4Pg,3811
|
|
36
|
+
fsspec/implementations/libarchive.py,sha256=YYZoHefBQItg5lsyJiSy2qPEdUDnfO9IbQWm8dwsGtY,7150
|
|
37
|
+
fsspec/implementations/local.py,sha256=iqXA60ICWEftzJd6LO2nkMH-vy1AoGe8BD4Wwlw94dw,13291
|
|
38
|
+
fsspec/implementations/memory.py,sha256=-a-NR66T-sGj9xTInUsu8KsEiqd156bF8Ui9BuXfmEA,9698
|
|
39
|
+
fsspec/implementations/reference.py,sha256=BHhvx8LIYyBk5OVBWw-PmZsAs_OCaLvF1p8656bwVJE,42438
|
|
40
|
+
fsspec/implementations/sftp.py,sha256=kZRsE8lwMOSyX-wlVBOGVVrc3cxZBcYbsoX0L28w6sQ,5524
|
|
41
|
+
fsspec/implementations/smb.py,sha256=k3RtzW97lJtYuw_QpP1rJRFnUBmSsw9twFjUCex0a5U,10591
|
|
42
|
+
fsspec/implementations/tar.py,sha256=5ZpUp4E3SYbqrwAX2ezvZJqUoZO74Pjb9FpF8o1YBGs,4071
|
|
43
|
+
fsspec/implementations/webhdfs.py,sha256=C5T96C_p66pUf2cQda-7HIZ9fKYwfCkupf2LN_7n7Dw,16145
|
|
44
|
+
fsspec/implementations/zip.py,sha256=SJQihbLqUaMvN8kQiLmTOnc6Mdy3lozQp3MxmrJXqJw,4150
|
|
45
|
+
fsspec/tests/abstract/__init__.py,sha256=i1wcFixV6QhOwdoB24c8oXjzobISNqiKVz9kl2DvAY8,10028
|
|
46
|
+
fsspec/tests/abstract/common.py,sha256=1GQwNo5AONzAnzZj0fWgn8NJPLXALehbsuGxS3FzWVU,4973
|
|
47
|
+
fsspec/tests/abstract/copy.py,sha256=nyCp1Q9apHzti2_UPDh3HzVhRmV7dciD-3dq-wM7JuU,19643
|
|
48
|
+
fsspec/tests/abstract/get.py,sha256=vNR4HztvTR7Cj56AMo7_tx7TeYz1Jgr_2Wb8Lv-UiBY,20755
|
|
49
|
+
fsspec/tests/abstract/put.py,sha256=hEf-yuMWBOT7B6eWcck3tMyJWzdVXtxkY-O6LUt1KAE,20877
|
|
50
|
+
fsspec-2023.12.0.dist-info/LICENSE,sha256=LcNUls5TpzB5FcAIqESq1T53K0mzTN0ARFBnaRQH7JQ,1513
|
|
51
|
+
fsspec-2023.12.0.dist-info/METADATA,sha256=0PDf5Q5_GcLo-r7fJruRt98YZFONP9hX2NbmPfSllrM,6829
|
|
52
|
+
fsspec-2023.12.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
53
|
+
fsspec-2023.12.0.dist-info/top_level.txt,sha256=blt2pDrQDwN3Gklcw13CSPLQRd6aaOgJ8AxqrW395MI,7
|
|
54
|
+
fsspec-2023.12.0.dist-info/RECORD,,
|