remote-upload 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- remote_upload/__init__.py +93 -0
- remote_upload/content.py +36 -0
- remote_upload/exceptions.py +40 -0
- remote_upload/facade.py +37 -0
- remote_upload/metered.py +91 -0
- remote_upload/progress.py +18 -0
- remote_upload/py.typed +0 -0
- remote_upload/request.py +148 -0
- remote_upload/result.py +57 -0
- remote_upload/target.py +30 -0
- remote_upload/targets/__init__.py +3 -0
- remote_upload/targets/azure.py +103 -0
- remote_upload/targets/gcs.py +107 -0
- remote_upload/targets/http.py +104 -0
- remote_upload/targets/httpx_target.py +112 -0
- remote_upload/targets/s3.py +120 -0
- remote_upload/targets/sftp.py +236 -0
- remote_upload-0.1.0.dist-info/METADATA +366 -0
- remote_upload-0.1.0.dist-info/RECORD +21 -0
- remote_upload-0.1.0.dist-info/WHEEL +4 -0
- remote_upload-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""remote-upload: stream local content INTO remote storage through one tiny API.
|
|
2
|
+
|
|
3
|
+
Write-side twin of ``remote-download``: push bytes from your process into S3 /
|
|
4
|
+
MinIO, Azure Blob, GCS, SFTP or an authenticated HTTP endpoint via a single
|
|
5
|
+
framework-agnostic facade.
|
|
6
|
+
|
|
7
|
+
from remote_upload import RemoteUpload
|
|
8
|
+
|
|
9
|
+
result = (
|
|
10
|
+
RemoteUpload.to("https://api.example.com/files/report.pdf")
|
|
11
|
+
.body(data)
|
|
12
|
+
.content_type("application/pdf")
|
|
13
|
+
.upload()
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
The ``HttpTarget`` backend is pure stdlib and always available. The cloud / SSH
|
|
17
|
+
backends need an extra:
|
|
18
|
+
|
|
19
|
+
S3Target -> pip install "remote-upload[s3]" (boto3)
|
|
20
|
+
AzureBlobTarget-> pip install "remote-upload[azure]" (azure-storage-blob)
|
|
21
|
+
GcsTarget -> pip install "remote-upload[gcs]" (google-cloud-storage)
|
|
22
|
+
SftpTarget -> pip install "remote-upload[sftp]" (paramiko)
|
|
23
|
+
HttpxTarget -> pip install "remote-upload[httpx]" (httpx; retries/auth/proxy)
|
|
24
|
+
|
|
25
|
+
They are importable from the package root lazily (``from remote_upload import
|
|
26
|
+
S3Target``) or directly (``from remote_upload.targets.s3 import S3Target``).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from typing import TYPE_CHECKING
|
|
32
|
+
|
|
33
|
+
from .content import UploadContent
|
|
34
|
+
from .exceptions import RemoteUploadError, RetryableUploadError, TerminalUploadError
|
|
35
|
+
from .facade import RemoteUpload
|
|
36
|
+
from .progress import ProgressListener
|
|
37
|
+
from .request import RemoteUploadRequest
|
|
38
|
+
from .result import UploadResult
|
|
39
|
+
from .target import UploadTarget
|
|
40
|
+
from .targets.http import HttpTarget
|
|
41
|
+
|
|
42
|
+
__version__ = "0.1.0"
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
from .targets.azure import AzureBlobTarget
|
|
46
|
+
from .targets.gcs import GcsTarget
|
|
47
|
+
from .targets.httpx_target import HttpxTarget
|
|
48
|
+
from .targets.s3 import S3Target
|
|
49
|
+
from .targets.sftp import SftpTarget
|
|
50
|
+
|
|
51
|
+
# Optional, extra-gated targets: module path + the extra that ships their dep.
|
|
52
|
+
_LAZY: dict[str, tuple[str, str]] = {
|
|
53
|
+
"S3Target": (".targets.s3", "s3"),
|
|
54
|
+
"AzureBlobTarget": (".targets.azure", "azure"),
|
|
55
|
+
"GcsTarget": (".targets.gcs", "gcs"),
|
|
56
|
+
"SftpTarget": (".targets.sftp", "sftp"),
|
|
57
|
+
"HttpxTarget": (".targets.httpx_target", "httpx"),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
"RemoteUpload",
|
|
62
|
+
"RemoteUploadRequest",
|
|
63
|
+
"UploadTarget",
|
|
64
|
+
"UploadContent",
|
|
65
|
+
"UploadResult",
|
|
66
|
+
"ProgressListener",
|
|
67
|
+
"RemoteUploadError",
|
|
68
|
+
"RetryableUploadError",
|
|
69
|
+
"TerminalUploadError",
|
|
70
|
+
"HttpTarget",
|
|
71
|
+
"S3Target",
|
|
72
|
+
"AzureBlobTarget",
|
|
73
|
+
"GcsTarget",
|
|
74
|
+
"SftpTarget",
|
|
75
|
+
"HttpxTarget",
|
|
76
|
+
"__version__",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def __getattr__(name: str) -> object:
|
|
81
|
+
"""Lazily import the extra-gated targets, with a helpful error if missing."""
|
|
82
|
+
if name in _LAZY:
|
|
83
|
+
import importlib
|
|
84
|
+
|
|
85
|
+
module_path, extra = _LAZY[name]
|
|
86
|
+
try:
|
|
87
|
+
module = importlib.import_module(module_path, __name__)
|
|
88
|
+
except ImportError as exc:
|
|
89
|
+
raise ImportError(
|
|
90
|
+
f'{name} requires an optional dependency. Install it with: pip install "remote-upload[{extra}]"'
|
|
91
|
+
) from exc
|
|
92
|
+
return getattr(module, name)
|
|
93
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
remote_upload/content.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""The payload handed to an :class:`~remote_upload.target.UploadTarget`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import IO
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True, slots=True)
|
|
11
|
+
class UploadContent:
|
|
12
|
+
"""The body stream plus the metadata a destination may persist.
|
|
13
|
+
|
|
14
|
+
Built by :class:`~remote_upload.request.RemoteUploadRequest` and handed to a
|
|
15
|
+
target's ``upload``. The ``body`` is owned by the caller (the request), which
|
|
16
|
+
wraps it for metering / checksum and closes it after the upload -- targets
|
|
17
|
+
read it but must **not** close it.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
#: Live body to stream into the destination. Read by the target, closed by
|
|
21
|
+
#: the caller.
|
|
22
|
+
body: IO[bytes]
|
|
23
|
+
|
|
24
|
+
#: Content length in bytes, or ``None`` when unknown. Some targets (S3 single
|
|
25
|
+
#: PUT) need it; others (chunked HTTP) can stream without it.
|
|
26
|
+
content_length: int | None = None
|
|
27
|
+
|
|
28
|
+
#: MIME type to store with the object, or ``None``.
|
|
29
|
+
content_type: str | None = None
|
|
30
|
+
|
|
31
|
+
#: Suggested filename / object key tail, or ``None``.
|
|
32
|
+
filename: str | None = None
|
|
33
|
+
|
|
34
|
+
#: Arbitrary user metadata to attach to the object. Never ``None`` -- an
|
|
35
|
+
#: empty mapping when none was supplied.
|
|
36
|
+
metadata: Mapping[str, str] = field(default_factory=dict)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Exception hierarchy for remote-upload.
|
|
2
|
+
|
|
3
|
+
Targets translate provider failures into one of two subtypes so callers can act
|
|
4
|
+
on the distinction without parsing messages:
|
|
5
|
+
|
|
6
|
+
* :class:`RetryableUploadError` -- transient (network blip, 5xx, timeout):
|
|
7
|
+
retrying later may succeed.
|
|
8
|
+
* :class:`TerminalUploadError` -- permanent (auth, 4xx, quota, validation):
|
|
9
|
+
retrying the same request will fail again; fix something first.
|
|
10
|
+
|
|
11
|
+
This retryable/terminal split is the deliberate improvement over a single
|
|
12
|
+
exception type: it lets an outbox / sync coordinator decide between "keep
|
|
13
|
+
retrying" and "mark failed, surface to user".
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RemoteUploadError(Exception):
|
|
20
|
+
"""Base error for failures raised while streaming a body to a destination.
|
|
21
|
+
|
|
22
|
+
Prefer raising one of the two subtypes (:class:`RetryableUploadError` /
|
|
23
|
+
:class:`TerminalUploadError`) so callers can branch on retry semantics.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RetryableUploadError(RemoteUploadError):
|
|
28
|
+
"""A transient upload failure -- a network blip, a 5xx response, a timeout.
|
|
29
|
+
|
|
30
|
+
Callers with a retry budget (an offline outbox, a sync coordinator) should
|
|
31
|
+
re-enqueue with backoff when they catch this.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TerminalUploadError(RemoteUploadError):
|
|
36
|
+
"""A permanent upload failure -- invalid credentials, a 4xx, quota, validation.
|
|
37
|
+
|
|
38
|
+
Retrying the same request will fail again; the caller must change something
|
|
39
|
+
(re-auth, fix the payload, escalate) instead of blindly retrying.
|
|
40
|
+
"""
|
remote_upload/facade.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Public, framework-agnostic entry point of the library."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .request import RemoteUploadRequest
|
|
6
|
+
from .target import UploadTarget
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RemoteUpload:
|
|
10
|
+
"""Entry point: start a fluent upload request.
|
|
11
|
+
|
|
12
|
+
::
|
|
13
|
+
|
|
14
|
+
# 1. Plain HTTP PUT to a URL
|
|
15
|
+
RemoteUpload.to("https://api.example.com/files/report.pdf") \\
|
|
16
|
+
.body(data).content_type("application/pdf").upload()
|
|
17
|
+
|
|
18
|
+
# 2. Cloud storage target (S3 / MinIO, Azure, GCS, ...)
|
|
19
|
+
target = S3Target(bucket="my-bucket", key="tenant/123/photo.jpg",
|
|
20
|
+
endpoint="http://localhost:9000",
|
|
21
|
+
access_key=ak, secret_key=sk)
|
|
22
|
+
result = RemoteUpload.to(target).body(data).content_type("image/jpeg").upload()
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
def to(target: UploadTarget | str) -> RemoteUploadRequest:
|
|
27
|
+
"""Start a request against a target, or a URL for a plain HTTP PUT.
|
|
28
|
+
|
|
29
|
+
A ``str`` is treated as an absolute HTTP/HTTPS URL and wrapped in a
|
|
30
|
+
default :class:`~remote_upload.targets.http.HttpTarget` (PUT, no auth).
|
|
31
|
+
Any other object is used directly as the :class:`UploadTarget`.
|
|
32
|
+
"""
|
|
33
|
+
if isinstance(target, str):
|
|
34
|
+
from .targets.http import HttpTarget
|
|
35
|
+
|
|
36
|
+
return RemoteUploadRequest(HttpTarget(target))
|
|
37
|
+
return RemoteUploadRequest(target)
|
remote_upload/metered.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Stream decorator that instruments the body as the target reads it."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import io
|
|
7
|
+
from typing import IO
|
|
8
|
+
|
|
9
|
+
from .progress import ProgressListener
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _new_digest(algorithm: str) -> hashlib._Hash:
|
|
13
|
+
"""Build a hashlib digest, tolerating Java-style names like ``"SHA-256"``.
|
|
14
|
+
|
|
15
|
+
Tries the name verbatim first, then a normalized form (lower-cased, dashes
|
|
16
|
+
removed) so both ``"sha256"`` and ``"SHA-256"`` work.
|
|
17
|
+
|
|
18
|
+
:raises ValueError: if the algorithm is not supported by hashlib.
|
|
19
|
+
"""
|
|
20
|
+
try:
|
|
21
|
+
return hashlib.new(algorithm)
|
|
22
|
+
except ValueError:
|
|
23
|
+
normalized = algorithm.lower().replace("-", "")
|
|
24
|
+
try:
|
|
25
|
+
return hashlib.new(normalized)
|
|
26
|
+
except ValueError as exc:
|
|
27
|
+
raise ValueError(f"Unsupported digest algorithm: {algorithm!r}") from exc
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MeteredReader(io.RawIOBase):
|
|
31
|
+
"""A read-only stream wrapper that instruments the body as it is consumed.
|
|
32
|
+
|
|
33
|
+
Counts bytes, optionally computes a digest, and fires a
|
|
34
|
+
:data:`~remote_upload.progress.ProgressListener` -- all driven by the target
|
|
35
|
+
SDK pulling bytes (uploads are reader-driven, so we instrument the stream the
|
|
36
|
+
SDK reads rather than running our own copy loop).
|
|
37
|
+
|
|
38
|
+
Closing this reader closes the wrapped stream (the request opens it inside a
|
|
39
|
+
``with`` block). Targets read but must not close it.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
raw: IO[bytes],
|
|
45
|
+
total: int | None = None,
|
|
46
|
+
listener: ProgressListener | None = None,
|
|
47
|
+
digest_algorithm: str | None = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
super().__init__()
|
|
50
|
+
self._raw = raw
|
|
51
|
+
self._total = total
|
|
52
|
+
self._listener = listener
|
|
53
|
+
self._digest = _new_digest(digest_algorithm) if digest_algorithm else None
|
|
54
|
+
self._count = 0
|
|
55
|
+
|
|
56
|
+
def readable(self) -> bool:
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
def readinto(self, b: bytearray | memoryview) -> int: # type: ignore[override]
|
|
60
|
+
view = memoryview(b)
|
|
61
|
+
chunk = self._raw.read(len(view))
|
|
62
|
+
if not chunk:
|
|
63
|
+
return 0
|
|
64
|
+
n = len(chunk)
|
|
65
|
+
view[:n] = chunk
|
|
66
|
+
self._count += n
|
|
67
|
+
if self._digest is not None:
|
|
68
|
+
self._digest.update(chunk)
|
|
69
|
+
# Mirror the Java contract: fire progress on bulk reads only, never on
|
|
70
|
+
# single-byte reads (a 1-byte buffer) -- those would be far too chatty.
|
|
71
|
+
if self._listener is not None and len(view) > 1:
|
|
72
|
+
self._listener(self._count, self._total)
|
|
73
|
+
return n
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def bytes_transferred(self) -> int:
|
|
77
|
+
"""Total bytes read through this stream so far."""
|
|
78
|
+
return self._count
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def checksum_hex(self) -> str | None:
|
|
82
|
+
"""Lower-case hex digest, or ``None`` if no algorithm was set."""
|
|
83
|
+
return self._digest.hexdigest() if self._digest is not None else None
|
|
84
|
+
|
|
85
|
+
def close(self) -> None:
|
|
86
|
+
if self.closed:
|
|
87
|
+
return
|
|
88
|
+
try:
|
|
89
|
+
self._raw.close()
|
|
90
|
+
finally:
|
|
91
|
+
super().close()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Progress callback type."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
|
|
7
|
+
#: Callback invoked as the body is streamed into the destination.
|
|
8
|
+
#:
|
|
9
|
+
#: Receives ``(bytes_transferred, total_bytes)`` where ``total_bytes`` is
|
|
10
|
+
#: ``None`` when the caller did not supply a content length (chunked uploads,
|
|
11
|
+
#: unknown-length streams)::
|
|
12
|
+
#:
|
|
13
|
+
#: def on_progress(sent: int, total: int | None) -> None:
|
|
14
|
+
#: pct = (sent * 100 // total) if total else -1
|
|
15
|
+
#: print(f"uploaded {sent} / {total} bytes ({pct}%)")
|
|
16
|
+
#:
|
|
17
|
+
#: RemoteUpload.to(target).body(data).on_progress(on_progress).upload()
|
|
18
|
+
ProgressListener = Callable[[int, "int | None"], None]
|
remote_upload/py.typed
ADDED
|
File without changes
|
remote_upload/request.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Fluent request that wraps a target and pushes a body into it."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import mimetypes
|
|
7
|
+
import os
|
|
8
|
+
import time
|
|
9
|
+
from collections.abc import Mapping
|
|
10
|
+
from dataclasses import replace
|
|
11
|
+
from datetime import timedelta
|
|
12
|
+
from typing import IO, cast
|
|
13
|
+
|
|
14
|
+
from .content import UploadContent
|
|
15
|
+
from .metered import MeteredReader
|
|
16
|
+
from .progress import ProgressListener
|
|
17
|
+
from .result import UploadResult
|
|
18
|
+
from .target import UploadTarget
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RemoteUploadRequest:
|
|
22
|
+
"""Fluent request that wraps an :class:`UploadTarget` and pushes a body into it.
|
|
23
|
+
|
|
24
|
+
Supply the body with :meth:`body` / :meth:`body_file`, decorate it with
|
|
25
|
+
content type / filename / metadata / progress / checksum, then call
|
|
26
|
+
:meth:`upload`.
|
|
27
|
+
|
|
28
|
+
:meth:`upload` consumes and **closes** the body stream. Build a new request
|
|
29
|
+
per upload -- instances are not reusable.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, target: UploadTarget) -> None:
|
|
33
|
+
self._target = target
|
|
34
|
+
self._body: IO[bytes] | None = None
|
|
35
|
+
self._content_length: int | None = None
|
|
36
|
+
self._content_type: str | None = None
|
|
37
|
+
self._filename: str | None = None
|
|
38
|
+
self._metadata: dict[str, str] = {}
|
|
39
|
+
self._progress_listener: ProgressListener | None = None
|
|
40
|
+
self._checksum_algorithm: str | None = None
|
|
41
|
+
|
|
42
|
+
def body(
|
|
43
|
+
self,
|
|
44
|
+
data: bytes | bytearray | memoryview | IO[bytes],
|
|
45
|
+
length: int | None = None,
|
|
46
|
+
) -> RemoteUploadRequest:
|
|
47
|
+
"""Set the body from raw bytes or an open binary stream.
|
|
48
|
+
|
|
49
|
+
* ``bytes`` / ``bytearray`` / ``memoryview`` -- the content length is
|
|
50
|
+
exact and ``length`` is ignored.
|
|
51
|
+
* a binary stream (anything with ``.read``) -- pass ``length`` when the
|
|
52
|
+
size is known (preferred: cloud targets like S3 need it); omit it for
|
|
53
|
+
chunked / unknown-length uploads.
|
|
54
|
+
"""
|
|
55
|
+
if isinstance(data, (bytes, bytearray, memoryview)):
|
|
56
|
+
raw = bytes(data)
|
|
57
|
+
self._body = io.BytesIO(raw)
|
|
58
|
+
self._content_length = len(raw)
|
|
59
|
+
elif hasattr(data, "read"):
|
|
60
|
+
self._body = data
|
|
61
|
+
self._content_length = length
|
|
62
|
+
else:
|
|
63
|
+
raise TypeError(
|
|
64
|
+
f"body() expects bytes or a binary stream; got {type(data).__name__}. For a file path use body_file()."
|
|
65
|
+
)
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
def body_file(self, path: str | os.PathLike[str]) -> RemoteUploadRequest:
|
|
69
|
+
"""Set the body from a file on disk.
|
|
70
|
+
|
|
71
|
+
Infers the content length, and (unless already set) the filename and
|
|
72
|
+
content type.
|
|
73
|
+
"""
|
|
74
|
+
self._body = open(path, "rb") # noqa: SIM115 -- closed by upload()
|
|
75
|
+
self._content_length = os.path.getsize(path)
|
|
76
|
+
if self._filename is None:
|
|
77
|
+
self._filename = os.path.basename(os.fspath(path))
|
|
78
|
+
if self._content_type is None:
|
|
79
|
+
self._content_type = mimetypes.guess_type(os.fspath(path))[0]
|
|
80
|
+
return self
|
|
81
|
+
|
|
82
|
+
def content_type(self, content_type: str | None) -> RemoteUploadRequest:
|
|
83
|
+
"""Set the MIME type stored with the object."""
|
|
84
|
+
self._content_type = content_type
|
|
85
|
+
return self
|
|
86
|
+
|
|
87
|
+
def filename(self, filename: str | None) -> RemoteUploadRequest:
|
|
88
|
+
"""Set the suggested filename / object key tail."""
|
|
89
|
+
self._filename = filename
|
|
90
|
+
return self
|
|
91
|
+
|
|
92
|
+
def metadata(self, metadata: Mapping[str, str] | None) -> RemoteUploadRequest:
|
|
93
|
+
"""Replace the user metadata with a copy of ``metadata`` (``None`` clears)."""
|
|
94
|
+
self._metadata = dict(metadata) if metadata else {}
|
|
95
|
+
return self
|
|
96
|
+
|
|
97
|
+
def add_metadata(self, key: str, value: str) -> RemoteUploadRequest:
|
|
98
|
+
"""Add a single metadata entry."""
|
|
99
|
+
self._metadata[key] = value
|
|
100
|
+
return self
|
|
101
|
+
|
|
102
|
+
def on_progress(self, listener: ProgressListener | None) -> RemoteUploadRequest:
|
|
103
|
+
"""Register a progress callback fired as the destination reads the body."""
|
|
104
|
+
self._progress_listener = listener
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
def checksum(self, algorithm: str | None) -> RemoteUploadRequest:
|
|
108
|
+
"""Compute a checksum of the streamed bytes, exposed in the result.
|
|
109
|
+
|
|
110
|
+
Common values: ``"md5"``, ``"sha256"`` (Java-style ``"SHA-256"`` also
|
|
111
|
+
works). Pass ``None`` to disable.
|
|
112
|
+
"""
|
|
113
|
+
self._checksum_algorithm = algorithm
|
|
114
|
+
return self
|
|
115
|
+
|
|
116
|
+
def upload(self) -> UploadResult:
|
|
117
|
+
"""Stream the body into the target and return the enriched result.
|
|
118
|
+
|
|
119
|
+
:raises ValueError: if no body was supplied.
|
|
120
|
+
"""
|
|
121
|
+
if self._body is None:
|
|
122
|
+
raise ValueError("body is required; call body(...) before upload()")
|
|
123
|
+
|
|
124
|
+
start = time.perf_counter()
|
|
125
|
+
with MeteredReader(
|
|
126
|
+
self._body,
|
|
127
|
+
self._content_length,
|
|
128
|
+
self._progress_listener,
|
|
129
|
+
self._checksum_algorithm,
|
|
130
|
+
) as metered:
|
|
131
|
+
content = UploadContent(
|
|
132
|
+
# MeteredReader is a readable binary stream; cast to the IO[bytes]
|
|
133
|
+
# contract (io.RawIOBase is not nominally IO[bytes] for mypy).
|
|
134
|
+
body=cast("IO[bytes]", metered),
|
|
135
|
+
content_length=self._content_length,
|
|
136
|
+
content_type=self._content_type,
|
|
137
|
+
filename=self._filename,
|
|
138
|
+
metadata=dict(self._metadata),
|
|
139
|
+
)
|
|
140
|
+
provider_result = self._target.upload(content)
|
|
141
|
+
elapsed = timedelta(seconds=time.perf_counter() - start)
|
|
142
|
+
return replace(
|
|
143
|
+
provider_result,
|
|
144
|
+
bytes_transferred=metered.bytes_transferred,
|
|
145
|
+
duration=elapsed,
|
|
146
|
+
checksum_algorithm=self._checksum_algorithm,
|
|
147
|
+
checksum_hex=metered.checksum_hex,
|
|
148
|
+
)
|
remote_upload/result.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Outcome of an upload."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from datetime import timedelta
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True, slots=True)
|
|
10
|
+
class UploadResult:
|
|
11
|
+
"""Outcome of a :meth:`~remote_upload.request.RemoteUploadRequest.upload` call.
|
|
12
|
+
|
|
13
|
+
Couples the numeric transfer stats (bytes, duration, optional checksum)
|
|
14
|
+
computed by the library with the provider-specific identifiers (key,
|
|
15
|
+
location, ETag, version id) returned by the target.
|
|
16
|
+
|
|
17
|
+
The target builds it with what it knows; the request enriches a copy (via
|
|
18
|
+
:func:`dataclasses.replace`) with the metered byte count, duration and
|
|
19
|
+
checksum.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
#: Object key / remote path the bytes were written to.
|
|
23
|
+
key: str
|
|
24
|
+
|
|
25
|
+
#: Fully-qualified location (URL / URI) when the provider exposes one.
|
|
26
|
+
location: str | None = None
|
|
27
|
+
|
|
28
|
+
#: Provider ETag (S3 / Azure) when available.
|
|
29
|
+
etag: str | None = None
|
|
30
|
+
|
|
31
|
+
#: Provider version id when versioning is enabled.
|
|
32
|
+
version_id: str | None = None
|
|
33
|
+
|
|
34
|
+
#: Total number of bytes streamed to the destination.
|
|
35
|
+
bytes_transferred: int = 0
|
|
36
|
+
|
|
37
|
+
#: Wall-clock time spent on the upload.
|
|
38
|
+
duration: timedelta | None = None
|
|
39
|
+
|
|
40
|
+
#: Content type stored with the object.
|
|
41
|
+
content_type: str | None = None
|
|
42
|
+
|
|
43
|
+
#: Checksum algorithm computed over the body, or ``None`` if none requested.
|
|
44
|
+
checksum_algorithm: str | None = None
|
|
45
|
+
|
|
46
|
+
#: Lower-case hex checksum digest, or ``None`` if none requested.
|
|
47
|
+
checksum_hex: str | None = None
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def bytes_per_second(self) -> int:
|
|
51
|
+
"""Effective throughput in bytes/second; ``0`` if duration is zero/None."""
|
|
52
|
+
if self.duration is None:
|
|
53
|
+
return 0
|
|
54
|
+
millis = self.duration / timedelta(milliseconds=1)
|
|
55
|
+
if millis <= 0:
|
|
56
|
+
return 0
|
|
57
|
+
return int(self.bytes_transferred * 1000 / millis)
|
remote_upload/target.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""The upload port: a destination that can receive a stream of bytes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Protocol, runtime_checkable
|
|
6
|
+
|
|
7
|
+
from .content import UploadContent
|
|
8
|
+
from .result import UploadResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@runtime_checkable
|
|
12
|
+
class UploadTarget(Protocol):
|
|
13
|
+
"""Universal abstraction for any remote destination that receives bytes.
|
|
14
|
+
|
|
15
|
+
Each backend (HTTP, S3, Azure, GCS, SFTP) supplies its own
|
|
16
|
+
implementation; consumers push bytes through the same API regardless of
|
|
17
|
+
where they land. Custom destinations only need a single ``upload`` method.
|
|
18
|
+
|
|
19
|
+
Implementations consume :attr:`UploadContent.body` but do **not** own its
|
|
20
|
+
lifecycle -- the caller (the request) opens and closes the body stream.
|
|
21
|
+
|
|
22
|
+
Implementations should translate provider failures into
|
|
23
|
+
:class:`~remote_upload.exceptions.RetryableUploadError` (transient) or
|
|
24
|
+
:class:`~remote_upload.exceptions.TerminalUploadError` (permanent) so callers
|
|
25
|
+
can decide whether to retry.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def upload(self, content: UploadContent) -> UploadResult:
|
|
29
|
+
"""Stream ``content`` into the destination and return the outcome."""
|
|
30
|
+
...
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Upload target that writes blobs to Azure Blob Storage."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from azure.core.exceptions import AzureError, HttpResponseError, ServiceRequestError
|
|
9
|
+
from azure.storage.blob import BlobClient, ContentSettings
|
|
10
|
+
|
|
11
|
+
from ..content import UploadContent
|
|
12
|
+
from ..exceptions import RetryableUploadError, TerminalUploadError
|
|
13
|
+
from ..result import UploadResult
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AzureBlobTarget:
|
|
19
|
+
""":class:`~remote_upload.target.UploadTarget` for Azure Blob Storage.
|
|
20
|
+
|
|
21
|
+
Authenticates one of three ways: a full connection string, an endpoint URL
|
|
22
|
+
plus an optional SAS token, or a pre-built :class:`BlobClient` (handy for
|
|
23
|
+
reuse and tests). Uploads overwrite an existing blob (idempotent re-PUT of
|
|
24
|
+
the same key).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
*,
|
|
30
|
+
container: str,
|
|
31
|
+
blob: str,
|
|
32
|
+
connection_string: str | None = None,
|
|
33
|
+
endpoint: str | None = None,
|
|
34
|
+
sas_token: str | None = None,
|
|
35
|
+
client: BlobClient | None = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
if not container:
|
|
38
|
+
raise ValueError("AzureBlobTarget: container is required")
|
|
39
|
+
if not blob:
|
|
40
|
+
raise ValueError("AzureBlobTarget: blob is required")
|
|
41
|
+
if client is None and not connection_string and not endpoint:
|
|
42
|
+
raise ValueError("AzureBlobTarget: either connection_string, endpoint, or a client must be provided")
|
|
43
|
+
self._container = container
|
|
44
|
+
self._blob = blob
|
|
45
|
+
self._connection_string = connection_string
|
|
46
|
+
self._endpoint = endpoint
|
|
47
|
+
self._sas_token = sas_token
|
|
48
|
+
self._client = client
|
|
49
|
+
|
|
50
|
+
def _build_client(self) -> BlobClient:
|
|
51
|
+
"""Build a :class:`BlobClient` from the configured connection string / endpoint."""
|
|
52
|
+
if self._connection_string:
|
|
53
|
+
return BlobClient.from_connection_string(
|
|
54
|
+
self._connection_string,
|
|
55
|
+
container_name=self._container,
|
|
56
|
+
blob_name=self._blob,
|
|
57
|
+
)
|
|
58
|
+
# __init__ guarantees endpoint is set when no connection string is given.
|
|
59
|
+
assert self._endpoint is not None
|
|
60
|
+
return BlobClient(
|
|
61
|
+
account_url=self._endpoint,
|
|
62
|
+
container_name=self._container,
|
|
63
|
+
blob_name=self._blob,
|
|
64
|
+
credential=self._sas_token,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def upload(self, content: UploadContent) -> UploadResult:
|
|
68
|
+
"""Stream ``content`` into the blob, overwriting any existing object."""
|
|
69
|
+
logger.debug("[AzureBlobTarget] PUT %s/%s", self._container, self._blob)
|
|
70
|
+
|
|
71
|
+
owns_client = self._client is None
|
|
72
|
+
client = self._client or self._build_client()
|
|
73
|
+
|
|
74
|
+
kwargs: dict[str, Any] = {"data": content.body, "overwrite": True}
|
|
75
|
+
if content.content_length is not None:
|
|
76
|
+
kwargs["length"] = content.content_length
|
|
77
|
+
if content.content_type:
|
|
78
|
+
kwargs["content_settings"] = ContentSettings(content_type=content.content_type)
|
|
79
|
+
if content.metadata:
|
|
80
|
+
kwargs["metadata"] = dict(content.metadata)
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
result = client.upload_blob(**kwargs)
|
|
84
|
+
return UploadResult(
|
|
85
|
+
key=self._blob,
|
|
86
|
+
location=client.url,
|
|
87
|
+
etag=result.get("etag"),
|
|
88
|
+
version_id=result.get("version_id"),
|
|
89
|
+
content_type=content.content_type,
|
|
90
|
+
)
|
|
91
|
+
except HttpResponseError as exc:
|
|
92
|
+
status = exc.status_code
|
|
93
|
+
msg = f"Azure upload {self._container}/{self._blob} failed (HTTP {status})"
|
|
94
|
+
if status is not None and 400 <= status < 500:
|
|
95
|
+
raise TerminalUploadError(msg) from exc
|
|
96
|
+
raise RetryableUploadError(msg) from exc
|
|
97
|
+
except (ServiceRequestError, AzureError) as exc:
|
|
98
|
+
raise RetryableUploadError(f"Azure upload {self._container}/{self._blob} failed") from exc
|
|
99
|
+
finally:
|
|
100
|
+
# Close only a client we built ourselves; an injected client is the
|
|
101
|
+
# caller's to manage.
|
|
102
|
+
if owns_client:
|
|
103
|
+
client.close()
|