cloudfs 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloudfs-0.2.0 → cloudfs-0.3.0}/PKG-INFO +1 -1
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/backend/azure.py +70 -43
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/backend/s3.py +98 -46
- cloudfs-0.3.0/cloudfs/version.py +1 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/pyproject.toml +22 -12
- cloudfs-0.2.0/cloudfs/version.py +0 -1
- {cloudfs-0.2.0 → cloudfs-0.3.0}/LICENSE +0 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/README.md +0 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/__init__.py +0 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/backend/__init__.py +0 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/backend/gcs.py +0 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/base.py +0 -0
- {cloudfs-0.2.0 → cloudfs-0.3.0}/cloudfs/exceptions.py +0 -0
|
@@ -21,8 +21,10 @@ rmdir():
|
|
|
21
21
|
is_dir() returns True.
|
|
22
22
|
|
|
23
23
|
open():
|
|
24
|
-
Read modes
|
|
25
|
-
and
|
|
24
|
+
Read modes stream the blob via a chunked reader; write modes stream the
|
|
25
|
+
upload by staging blocks and committing them on close. Memory use is bounded
|
|
26
|
+
by the upload chunk size, not the blob size. read_bytes/write_bytes still
|
|
27
|
+
load the whole blob, matching pathlib semantics.
|
|
26
28
|
|
|
27
29
|
rename():
|
|
28
30
|
Implemented as copy + delete. Not atomic — a crash between the two steps
|
|
@@ -38,11 +40,16 @@ Performance:
|
|
|
38
40
|
|
|
39
41
|
from __future__ import annotations
|
|
40
42
|
|
|
43
|
+
import base64
|
|
41
44
|
import io
|
|
42
45
|
from typing import IO, Any, Generator, Iterator
|
|
43
46
|
|
|
44
47
|
from cloudfs.base import CloudPath
|
|
45
48
|
|
|
49
|
+
# Streaming upload block size. Bounds the memory held during a streaming
|
|
50
|
+
# open("wb"); a chunk is staged as a block and the block list committed on close.
|
|
51
|
+
_UPLOAD_CHUNK_SIZE = 8 * 1024 * 1024
|
|
52
|
+
|
|
46
53
|
|
|
47
54
|
class AzurePath(CloudPath):
|
|
48
55
|
"""pathlib.Path-compatible interface for Azure Blob Storage.
|
|
@@ -323,25 +330,23 @@ class AzurePath(CloudPath):
|
|
|
323
330
|
newline: str | None = None,
|
|
324
331
|
) -> IO:
|
|
325
332
|
if mode in ("rb", "r"):
|
|
326
|
-
|
|
327
|
-
buf = io.
|
|
333
|
+
downloader = self._container.download_blob(self._key)
|
|
334
|
+
buf = io.BufferedReader(_AzureReadBuffer(downloader))
|
|
328
335
|
if mode == "r":
|
|
329
336
|
return io.TextIOWrapper(
|
|
330
|
-
buf,
|
|
337
|
+
buf, encoding=encoding or "utf-8", errors=errors, newline=newline
|
|
338
|
+
)
|
|
339
|
+
return buf
|
|
340
|
+
if mode in ("wb", "w"):
|
|
341
|
+
raw = _AzureWriteBuffer(self._container, self._key)
|
|
342
|
+
if mode == "w":
|
|
343
|
+
return io.TextIOWrapper(
|
|
344
|
+
io.BufferedWriter(raw),
|
|
331
345
|
encoding=encoding or "utf-8",
|
|
332
346
|
errors=errors,
|
|
333
347
|
newline=newline,
|
|
334
348
|
)
|
|
335
|
-
return
|
|
336
|
-
if mode in ("wb", "w"):
|
|
337
|
-
return _AzureWriteBuffer(
|
|
338
|
-
self._container,
|
|
339
|
-
self._key,
|
|
340
|
-
binary=mode == "wb",
|
|
341
|
-
encoding=encoding or "utf-8",
|
|
342
|
-
errors=errors,
|
|
343
|
-
newline=newline,
|
|
344
|
-
)
|
|
349
|
+
return raw
|
|
345
350
|
raise ValueError(f"Unsupported mode: {mode!r}")
|
|
346
351
|
|
|
347
352
|
def read_bytes(self) -> bytes:
|
|
@@ -355,9 +360,7 @@ class AzurePath(CloudPath):
|
|
|
355
360
|
return len(data)
|
|
356
361
|
|
|
357
362
|
def write_text(self, data: str, encoding: str = "utf-8") -> int:
|
|
358
|
-
|
|
359
|
-
self._container.upload_blob(self._key, encoded, overwrite=True)
|
|
360
|
-
return len(encoded)
|
|
363
|
+
return self.write_bytes(data.encode(encoding))
|
|
361
364
|
|
|
362
365
|
def touch(self, mode: int = 0o666, exist_ok: bool = True) -> None:
|
|
363
366
|
if self._blob_exists():
|
|
@@ -426,36 +429,60 @@ class AzurePath(CloudPath):
|
|
|
426
429
|
return cls(container, key, _client=_client)
|
|
427
430
|
|
|
428
431
|
|
|
432
|
+
class _AzureReadBuffer(io.RawIOBase):
|
|
433
|
+
def __init__(self, downloader):
|
|
434
|
+
self._chunks = downloader.chunks()
|
|
435
|
+
self._leftover = b""
|
|
436
|
+
|
|
437
|
+
def readable(self) -> bool:
|
|
438
|
+
return True
|
|
439
|
+
|
|
440
|
+
def readinto(self, b) -> int:
|
|
441
|
+
if not self._leftover:
|
|
442
|
+
try:
|
|
443
|
+
self._leftover = next(self._chunks)
|
|
444
|
+
except StopIteration:
|
|
445
|
+
return 0
|
|
446
|
+
n = min(len(b), len(self._leftover))
|
|
447
|
+
b[:n] = self._leftover[:n]
|
|
448
|
+
self._leftover = self._leftover[n:]
|
|
449
|
+
return n
|
|
450
|
+
|
|
451
|
+
|
|
429
452
|
class _AzureWriteBuffer(io.RawIOBase):
|
|
430
|
-
def __init__(self, container, key
|
|
431
|
-
self.
|
|
432
|
-
self.
|
|
433
|
-
self.
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
self._text_wrapper = io.TextIOWrapper(
|
|
438
|
-
self._buf, encoding=encoding, errors=errors, newline=newline
|
|
439
|
-
)
|
|
453
|
+
def __init__(self, container, key):
|
|
454
|
+
self._blob = container.get_blob_client(key)
|
|
455
|
+
self._buf = bytearray()
|
|
456
|
+
self._block_ids: list[str] = []
|
|
457
|
+
|
|
458
|
+
def writable(self) -> bool:
|
|
459
|
+
return True
|
|
440
460
|
|
|
441
461
|
def write(self, data) -> int:
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
462
|
+
self._buf.extend(data)
|
|
463
|
+
while len(self._buf) >= _UPLOAD_CHUNK_SIZE:
|
|
464
|
+
self._flush_block(_UPLOAD_CHUNK_SIZE)
|
|
465
|
+
return len(data)
|
|
466
|
+
|
|
467
|
+
def _flush_block(self, size: int) -> None:
|
|
468
|
+
chunk = bytes(self._buf[:size])
|
|
469
|
+
del self._buf[:size]
|
|
470
|
+
block_id = base64.b64encode(f"{len(self._block_ids):032d}".encode()).decode()
|
|
471
|
+
self._blob.stage_block(block_id, chunk)
|
|
472
|
+
self._block_ids.append(block_id)
|
|
445
473
|
|
|
446
474
|
def close(self) -> None:
|
|
447
|
-
if
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
self.close()
|
|
475
|
+
if self.closed:
|
|
476
|
+
return
|
|
477
|
+
from azure.storage.blob import BlobBlock
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
if self._buf:
|
|
481
|
+
self._flush_block(len(self._buf))
|
|
482
|
+
block_list = [BlobBlock(block_id=bid) for bid in self._block_ids]
|
|
483
|
+
self._blob.commit_block_list(block_list)
|
|
484
|
+
finally:
|
|
485
|
+
super().close()
|
|
459
486
|
|
|
460
487
|
|
|
461
488
|
class AzureStatResult:
|
|
@@ -20,8 +20,10 @@ rmdir():
|
|
|
20
20
|
is_dir() returns True.
|
|
21
21
|
|
|
22
22
|
open():
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
Read modes stream the object via a chunked reader; write modes stream the
|
|
24
|
+
upload as an S3 multipart upload, flushing one part at a time. Memory use is
|
|
25
|
+
bounded by the upload chunk size, not the object size. read_bytes/write_bytes
|
|
26
|
+
still load the whole object, matching pathlib semantics.
|
|
25
27
|
|
|
26
28
|
Consistency:
|
|
27
29
|
S3 provides strong read-after-write consistency for all operations since
|
|
@@ -39,6 +41,10 @@ from typing import IO, Any, Generator, Iterator
|
|
|
39
41
|
|
|
40
42
|
from cloudfs.base import CloudPath
|
|
41
43
|
|
|
44
|
+
# Streaming upload part size. Must stay >= 5 MiB, S3's minimum non-final
|
|
45
|
+
# multipart part size. Bounds the memory held during a streaming open("wb").
|
|
46
|
+
_UPLOAD_CHUNK_SIZE = 8 * 1024 * 1024
|
|
47
|
+
|
|
42
48
|
|
|
43
49
|
class S3Path(CloudPath):
|
|
44
50
|
"""pathlib.Path-compatible interface for AWS S3."""
|
|
@@ -321,26 +327,25 @@ class S3Path(CloudPath):
|
|
|
321
327
|
newline: str | None = None,
|
|
322
328
|
) -> IO:
|
|
323
329
|
if mode in ("rb", "r"):
|
|
324
|
-
|
|
325
|
-
|
|
330
|
+
body = self._client.get_object(Bucket=self._bucket_name, Key=self._key)[
|
|
331
|
+
"Body"
|
|
332
|
+
]
|
|
333
|
+
buf = io.BufferedReader(_S3ReadBuffer(body))
|
|
326
334
|
if mode == "r":
|
|
327
335
|
return io.TextIOWrapper(
|
|
328
|
-
buf,
|
|
336
|
+
buf, encoding=encoding or "utf-8", errors=errors, newline=newline
|
|
337
|
+
)
|
|
338
|
+
return buf
|
|
339
|
+
if mode in ("wb", "w"):
|
|
340
|
+
raw = _S3WriteBuffer(self._client, self._bucket_name, self._key)
|
|
341
|
+
if mode == "w":
|
|
342
|
+
return io.TextIOWrapper(
|
|
343
|
+
io.BufferedWriter(raw),
|
|
329
344
|
encoding=encoding or "utf-8",
|
|
330
345
|
errors=errors,
|
|
331
346
|
newline=newline,
|
|
332
347
|
)
|
|
333
|
-
return
|
|
334
|
-
if mode in ("wb", "w"):
|
|
335
|
-
return _S3WriteBuffer(
|
|
336
|
-
self._client,
|
|
337
|
-
self._bucket_name,
|
|
338
|
-
self._key,
|
|
339
|
-
binary=mode == "wb",
|
|
340
|
-
encoding=encoding or "utf-8",
|
|
341
|
-
errors=errors,
|
|
342
|
-
newline=newline,
|
|
343
|
-
)
|
|
348
|
+
return raw
|
|
344
349
|
raise ValueError(f"Unsupported mode: {mode!r}")
|
|
345
350
|
|
|
346
351
|
def read_bytes(self) -> bytes:
|
|
@@ -355,9 +360,7 @@ class S3Path(CloudPath):
|
|
|
355
360
|
return len(data)
|
|
356
361
|
|
|
357
362
|
def write_text(self, data: str, encoding: str = "utf-8") -> int:
|
|
358
|
-
|
|
359
|
-
self._client.put_object(Bucket=self._bucket_name, Key=self._key, Body=encoded)
|
|
360
|
-
return len(encoded)
|
|
363
|
+
return self.write_bytes(data.encode(encoding))
|
|
361
364
|
|
|
362
365
|
def touch(self, mode: int = 0o666, exist_ok: bool = True) -> None:
|
|
363
366
|
if self._object_exists():
|
|
@@ -428,42 +431,91 @@ class S3Path(CloudPath):
|
|
|
428
431
|
return cls(bucket, key, _client=_client)
|
|
429
432
|
|
|
430
433
|
|
|
434
|
+
class _S3ReadBuffer(io.RawIOBase):
|
|
435
|
+
def __init__(self, body):
|
|
436
|
+
self._body = body
|
|
437
|
+
|
|
438
|
+
def readable(self) -> bool:
|
|
439
|
+
return True
|
|
440
|
+
|
|
441
|
+
def readinto(self, b) -> int:
|
|
442
|
+
chunk = self._body.read(len(b))
|
|
443
|
+
if not chunk:
|
|
444
|
+
return 0
|
|
445
|
+
n = len(chunk)
|
|
446
|
+
b[:n] = chunk
|
|
447
|
+
return n
|
|
448
|
+
|
|
449
|
+
def close(self) -> None:
|
|
450
|
+
if not self.closed:
|
|
451
|
+
try:
|
|
452
|
+
self._body.close()
|
|
453
|
+
finally:
|
|
454
|
+
super().close()
|
|
455
|
+
|
|
456
|
+
|
|
431
457
|
class _S3WriteBuffer(io.RawIOBase):
|
|
432
|
-
def __init__(self, client, bucket, key
|
|
458
|
+
def __init__(self, client, bucket, key):
|
|
433
459
|
self._client = client
|
|
434
460
|
self._bucket = bucket
|
|
435
461
|
self._key = key
|
|
436
|
-
self.
|
|
437
|
-
self.
|
|
438
|
-
self.
|
|
439
|
-
|
|
440
|
-
self.
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
self._buf, encoding=encoding, errors=errors, newline=newline
|
|
445
|
-
)
|
|
462
|
+
self._buf = bytearray()
|
|
463
|
+
self._parts: list[dict] = []
|
|
464
|
+
self._part_num = 1
|
|
465
|
+
resp = client.create_multipart_upload(Bucket=bucket, Key=key)
|
|
466
|
+
self._upload_id = resp["UploadId"]
|
|
467
|
+
|
|
468
|
+
def writable(self) -> bool:
|
|
469
|
+
return True
|
|
446
470
|
|
|
447
471
|
def write(self, data) -> int:
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
472
|
+
self._buf.extend(data)
|
|
473
|
+
while len(self._buf) >= _UPLOAD_CHUNK_SIZE:
|
|
474
|
+
self._flush_part(_UPLOAD_CHUNK_SIZE)
|
|
475
|
+
return len(data)
|
|
451
476
|
|
|
452
|
-
def
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
self.
|
|
457
|
-
self.
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
477
|
+
def _flush_part(self, size: int) -> None:
|
|
478
|
+
chunk = bytes(self._buf[:size])
|
|
479
|
+
del self._buf[:size]
|
|
480
|
+
resp = self._client.upload_part(
|
|
481
|
+
Bucket=self._bucket,
|
|
482
|
+
Key=self._key,
|
|
483
|
+
PartNumber=self._part_num,
|
|
484
|
+
UploadId=self._upload_id,
|
|
485
|
+
Body=chunk,
|
|
486
|
+
)
|
|
487
|
+
self._parts.append({"PartNumber": self._part_num, "ETag": resp["ETag"]})
|
|
488
|
+
self._part_num += 1
|
|
461
489
|
|
|
462
|
-
def
|
|
463
|
-
|
|
490
|
+
def _safe_abort(self) -> None:
|
|
491
|
+
try:
|
|
492
|
+
self._client.abort_multipart_upload(
|
|
493
|
+
Bucket=self._bucket, Key=self._key, UploadId=self._upload_id
|
|
494
|
+
)
|
|
495
|
+
except Exception:
|
|
496
|
+
pass
|
|
464
497
|
|
|
465
|
-
def
|
|
466
|
-
self.
|
|
498
|
+
def close(self) -> None:
|
|
499
|
+
if self.closed:
|
|
500
|
+
return
|
|
501
|
+
try:
|
|
502
|
+
if self._buf:
|
|
503
|
+
self._flush_part(len(self._buf))
|
|
504
|
+
if self._parts:
|
|
505
|
+
self._client.complete_multipart_upload(
|
|
506
|
+
Bucket=self._bucket,
|
|
507
|
+
Key=self._key,
|
|
508
|
+
UploadId=self._upload_id,
|
|
509
|
+
MultipartUpload={"Parts": self._parts},
|
|
510
|
+
)
|
|
511
|
+
else:
|
|
512
|
+
self._safe_abort()
|
|
513
|
+
self._client.put_object(Bucket=self._bucket, Key=self._key, Body=b"")
|
|
514
|
+
except Exception:
|
|
515
|
+
self._safe_abort()
|
|
516
|
+
raise
|
|
517
|
+
finally:
|
|
518
|
+
super().close()
|
|
467
519
|
|
|
468
520
|
|
|
469
521
|
class S3StatResult:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.3.0"
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
[project]
|
|
2
|
-
name = "cloudfs"
|
|
3
|
-
version = "0.2.0"
|
|
4
|
-
description = "An interface to interact with cloud storage as if it's a local filesystem."
|
|
5
2
|
authors = [{ name = "Allen Chou", email = "f1470891079@gmail.com" }]
|
|
3
|
+
dependencies = []
|
|
4
|
+
description = "An interface to interact with cloud storage as if it's a local filesystem."
|
|
6
5
|
license = "Apache-2.0"
|
|
6
|
+
name = "cloudfs"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
requires-python = ">=3.11,<4.0"
|
|
9
|
-
|
|
9
|
+
version = "0.3.0"
|
|
10
10
|
|
|
11
11
|
[project.optional-dependencies]
|
|
12
|
-
google = ["google-cloud-storage>=3,<4"]
|
|
13
|
-
s3 = ["boto3>=1.35,<2"]
|
|
14
|
-
azure = ["azure-storage-blob>=12,<13"]
|
|
15
12
|
all = [
|
|
16
|
-
"google-cloud-storage>=3,<4",
|
|
17
|
-
"boto3>=1.35,<2",
|
|
18
13
|
"azure-storage-blob>=12,<13",
|
|
14
|
+
"boto3>=1.35,<2",
|
|
15
|
+
"google-cloud-storage>=3,<4",
|
|
19
16
|
]
|
|
17
|
+
azure = ["azure-storage-blob>=12,<13"]
|
|
18
|
+
google = ["google-cloud-storage>=3,<4"]
|
|
19
|
+
s3 = ["boto3>=1.35,<2"]
|
|
20
20
|
|
|
21
21
|
[dependency-groups]
|
|
22
22
|
dev = [
|
|
@@ -29,19 +29,29 @@ dev = [
|
|
|
29
29
|
"pytest",
|
|
30
30
|
"pytest-xdist",
|
|
31
31
|
"python-dotenv",
|
|
32
|
+
"ruff",
|
|
32
33
|
]
|
|
33
34
|
|
|
34
35
|
[tool.black]
|
|
36
|
+
line-length = 88
|
|
35
37
|
target-version = ["py311"]
|
|
36
38
|
|
|
37
39
|
[tool.flake8]
|
|
38
|
-
max-line-length = 88
|
|
39
40
|
extend-ignore = ["E203"]
|
|
41
|
+
max-line-length = 88
|
|
40
42
|
|
|
41
43
|
[tool.isort]
|
|
42
|
-
profile = "black"
|
|
43
44
|
line_length = 88
|
|
45
|
+
profile = "black"
|
|
46
|
+
|
|
47
|
+
[tool.ruff]
|
|
48
|
+
line-length = 88
|
|
49
|
+
target-version = "py311"
|
|
50
|
+
|
|
51
|
+
[tool.ruff.lint]
|
|
52
|
+
# E203 (whitespace before ':') conflicts with black; mirror the flake8 ignore.
|
|
53
|
+
extend-ignore = ["E203"]
|
|
44
54
|
|
|
45
55
|
[build-system]
|
|
46
|
-
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
47
56
|
build-backend = "poetry.core.masonry.api"
|
|
57
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
cloudfs-0.2.0/cloudfs/version.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = "0.2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|