insto 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
insto/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from insto._version import __version__
2
+
3
+ __all__ = ["__version__"]
insto/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ import sys
2
+
3
+ from insto.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ sys.exit(main())
insto/_redact.py ADDED
@@ -0,0 +1,88 @@
1
+ """Single source of truth for secret redaction.
2
+
3
+ Used by `insto.cli._format_error` (user-facing error strings) and by the
4
+ logging formatter (file-side log output). Anything that may end up in
5
+ front of human eyes — stderr, log files, copy-pasted bug reports — passes
6
+ through `redact_secrets()` first.
7
+
8
+ Patterns covered:
9
+
10
+ - the literal value of `$HIKERAPI_TOKEN` if it is set in the environment;
11
+ - the literal values of any tokens / proxy credentials registered at
12
+ runtime via `register_secret()` (used by the config loader so a token
13
+ loaded from `~/.insto/config.toml` or supplied by `--proxy user:pass@`
14
+ is redacted the same way `$HIKERAPI_TOKEN` is);
15
+ - query-string `signature=` and `token=` parameters in URLs (HikerAPI
16
+ signs every CDN URL — those signatures are short-lived but still
17
+ sensitive);
18
+ - `Authorization: Bearer <token>` style headers if they ever surface
19
+ in an exception message;
20
+ - `proxy://user:pass@host` userinfo segments.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import os
26
+ import re
27
+ import threading
28
+
29
+ _QS_SECRET_RE = re.compile(
30
+ r"((?:^|[?&])(?:signature|token)=)[^&\s'\"]+",
31
+ re.IGNORECASE,
32
+ )
33
+ _BEARER_RE = re.compile(
34
+ r"(Bearer\s+)[A-Za-z0-9._~+/=-]+",
35
+ re.IGNORECASE,
36
+ )
37
+ _PROXY_USERINFO_RE = re.compile(
38
+ r"(\b[a-zA-Z][a-zA-Z0-9+.-]*://)([^:/@\s]+):([^@/\s]+)@",
39
+ )
40
+
41
+ _secrets_lock = threading.Lock()
42
+ _registered_secrets: set[str] = set()
43
+
44
+
45
+ def register_secret(value: str | None) -> None:
46
+ """Add `value` to the runtime redaction set.
47
+
48
+ Safe to call multiple times with the same value. Values shorter than
49
+ 4 characters are ignored to avoid pathological matches against common
50
+ substrings. Threadsafe under a small mutex so logging handlers that
51
+ call `redact_secrets` concurrently never see a torn set.
52
+ """
53
+ if not value or len(value) < 4:
54
+ return
55
+ with _secrets_lock:
56
+ _registered_secrets.add(value)
57
+
58
+
59
+ def clear_registered_secrets() -> None:
60
+ """Drop every value registered via `register_secret`. Useful in tests."""
61
+ with _secrets_lock:
62
+ _registered_secrets.clear()
63
+
64
+
65
+ def redact_secrets(text: str) -> str:
66
+ """Return `text` with known secret-shaped substrings replaced with `***`.
67
+
68
+ Stable: never raises. Threadsafe; the registered-secrets set is read
69
+ under a short mutex.
70
+ """
71
+ if not text:
72
+ return text
73
+ redacted = text
74
+ env_token = os.environ.get("HIKERAPI_TOKEN")
75
+ if env_token and len(env_token) >= 4:
76
+ redacted = redacted.replace(env_token, "***")
77
+ with _secrets_lock:
78
+ registered = tuple(_registered_secrets)
79
+ for secret in registered:
80
+ if secret and secret in redacted:
81
+ redacted = redacted.replace(secret, "***")
82
+ redacted = _PROXY_USERINFO_RE.sub(r"\1***:***@", redacted)
83
+ redacted = _QS_SECRET_RE.sub(r"\1***", redacted)
84
+ redacted = _BEARER_RE.sub(r"\1***", redacted)
85
+ return redacted
86
+
87
+
88
+ __all__ = ["clear_registered_secrets", "redact_secrets", "register_secret"]
insto/_version.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,52 @@
1
+ """Backend factory.
2
+
3
+ `make_backend(name, **opts)` is the single entry point used by the service
4
+ facade to construct a backend. Concrete backend modules are imported lazily
5
+ so that pulling in `insto.backends` does not pay the cost (and surface the
6
+ runtime dependency footprint) of every backend at once.
7
+
8
+ Practically: `import insto` does not import `hikerapi`. Only the
9
+ `make_backend("hiker", ...)` call does — and that import lives inside the
10
+ function body.
11
+
12
+ Setting `INSTO_BACKEND=fake` in the environment overrides the requested
13
+ name with `"fake"` — a self-contained, network-free backend used by E2E
14
+ tests. The override is intentionally global so the same CLI / REPL entry
15
+ points the user runs are exercised end-to-end without test-only patches.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ from typing import Any
22
+
23
+ from insto.backends._base import OSINTBackend
24
+
25
+ BACKEND_OVERRIDE_ENV = "INSTO_BACKEND"
26
+
27
+ __all__ = ["BACKEND_OVERRIDE_ENV", "OSINTBackend", "make_backend"]
28
+
29
+
30
+ def make_backend(name: str, **opts: Any) -> OSINTBackend:
31
+ """Construct a backend by short name.
32
+
33
+ Known names:
34
+ "hiker" — `HikerBackend` (HikerAPI SDK). Imports `hikerapi` lazily.
35
+ "fake" — `FakeBackendProd`, hardcoded in-process data for E2E
36
+ tests. Selected when `INSTO_BACKEND=fake` is set even if
37
+ the caller asked for another backend.
38
+
39
+ Raises `ValueError` for unknown backend names.
40
+ """
41
+ override = os.environ.get(BACKEND_OVERRIDE_ENV)
42
+ if override:
43
+ name = override
44
+ if name == "hiker":
45
+ from insto.backends.hiker import HikerBackend
46
+
47
+ return HikerBackend(**opts)
48
+ if name == "fake":
49
+ from insto.backends._fake import FakeBackendProd
50
+
51
+ return FakeBackendProd(**opts)
52
+ raise ValueError(f"unknown backend: {name!r}")
@@ -0,0 +1,135 @@
1
+ """Abstract OSINT backend interface.
2
+
3
+ `OSINTBackend` is the contract every backend (HikerAPI v0.1, aiograpi v0.2,
4
+ future TikTok / Bluesky / Threads providers) must implement. The command and
5
+ service layers depend on this ABC, never on a concrete backend — that is what
6
+ keeps v0.2 a pure addition.
7
+
8
+ All collection-returning methods are async generators (`AsyncIterator[T]`)
9
+ with an optional `limit: int | None` parameter. Cursors / page tokens are an
10
+ internal implementation detail of each backend and never leak above this
11
+ layer.
12
+
13
+ The methods raise exceptions from `insto.exceptions` exclusively; raw HTTP /
14
+ SDK errors must be mapped to the taxonomy by the backend itself.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from abc import ABC, abstractmethod
20
+ from collections.abc import AsyncIterator
21
+ from typing import Any
22
+
23
+ from insto.models import (
24
+ Comment,
25
+ Highlight,
26
+ HighlightItem,
27
+ Post,
28
+ Profile,
29
+ Quota,
30
+ Story,
31
+ User,
32
+ )
33
+
34
+
35
+ class OSINTBackend(ABC):
36
+ """Async OSINT data source for one social platform.
37
+
38
+ Implementations are expected to be safe for concurrent use within a single
39
+ asyncio event loop (the REPL drives one loop and may dispatch watch tasks
40
+ in parallel). They are NOT required to be process-safe.
41
+ """
42
+
43
+ # Capability tokens this backend exposes. Commands declare what they need
44
+ # via `@command(..., requires=("followed",))`; the dispatcher rejects the
45
+ # call when the active backend does not advertise the required tokens.
46
+ # HikerAPI exposes only public OSINT, so the default is empty; an
47
+ # `aiograpi` backend would extend this with `{"followed", ...}`.
48
+ capabilities: frozenset[str] = frozenset()
49
+
50
+ @abstractmethod
51
+ async def resolve_target(self, username: str) -> str:
52
+ """Return the stable `pk` for `username`, or raise `ProfileNotFound`."""
53
+
54
+ @abstractmethod
55
+ async def get_profile(self, pk: str) -> Profile:
56
+ """Fetch the full profile DTO for `pk`."""
57
+
58
+ @abstractmethod
59
+ async def get_user_about(self, pk: str) -> dict[str, Any]:
60
+ """Fetch the `user_about` payload (verification, dates, links)."""
61
+
62
+ @abstractmethod
63
+ def iter_user_posts(self, pk: str, *, limit: int | None = None) -> AsyncIterator[Post]:
64
+ """Iterate the user's feed posts in reverse chronological order."""
65
+
66
+ @abstractmethod
67
+ def iter_user_followers(self, pk: str, *, limit: int | None = None) -> AsyncIterator[User]:
68
+ """Iterate the user's followers."""
69
+
70
+ @abstractmethod
71
+ def iter_user_following(self, pk: str, *, limit: int | None = None) -> AsyncIterator[User]:
72
+ """Iterate accounts the user is following."""
73
+
74
+ @abstractmethod
75
+ def iter_user_tagged(self, pk: str, *, limit: int | None = None) -> AsyncIterator[Post]:
76
+ """Iterate posts the user is tagged in."""
77
+
78
+ @abstractmethod
79
+ def iter_user_highlights(
80
+ self, pk: str, *, limit: int | None = None
81
+ ) -> AsyncIterator[Highlight]:
82
+ """Iterate highlight reels owned by the user."""
83
+
84
+ @abstractmethod
85
+ def iter_highlight_items(
86
+ self, highlight_id: str, *, limit: int | None = None
87
+ ) -> AsyncIterator[HighlightItem]:
88
+ """Iterate items inside a highlight reel."""
89
+
90
+ @abstractmethod
91
+ def iter_post_comments(
92
+ self, media_pk: str, *, limit: int | None = None
93
+ ) -> AsyncIterator[Comment]:
94
+ """Iterate comments on a post."""
95
+
96
+ @abstractmethod
97
+ def iter_post_likers(self, media_pk: str, *, limit: int | None = None) -> AsyncIterator[User]:
98
+ """Iterate users who liked a post."""
99
+
100
+ @abstractmethod
101
+ def iter_user_stories(self, pk: str, *, limit: int | None = None) -> AsyncIterator[Story]:
102
+ """Iterate currently-active stories of a user."""
103
+
104
+ @abstractmethod
105
+ async def get_suggested(self, pk: str) -> list[User]:
106
+ """Fetch accounts suggested as similar to `pk`."""
107
+
108
+ @abstractmethod
109
+ def iter_hashtag_posts(self, tag: str, *, limit: int | None = None) -> AsyncIterator[Post]:
110
+ """Iterate top / recent posts under a hashtag."""
111
+
112
+ @abstractmethod
113
+ def get_quota(self) -> Quota:
114
+ """Return the last-known quota state for the backend."""
115
+
116
+ @abstractmethod
117
+ def get_last_error(self) -> BaseException | None:
118
+ """Return the last exception raised by this backend, if any."""
119
+
120
+ def get_schema_drift_count(self) -> int:
121
+ """Return the number of `SchemaDrift` errors observed this session.
122
+
123
+ Default 0 so simple backends (in-process fakes) need not track. Real
124
+ backends override to expose a running counter — surfaced by `/health`
125
+ so an operator can spot provider degradation.
126
+ """
127
+ return 0
128
+
129
+ async def aclose(self) -> None: # noqa: B027 — intentional empty default
130
+ """Release backend-owned resources (HTTP clients, sockets, …).
131
+
132
+ Default implementation is a no-op so simple in-memory backends (the
133
+ test fakes, future mock backends) need not override. Real backends
134
+ with network clients (HikerBackend) override to close them.
135
+ """
insto/backends/_cdn.py ADDED
@@ -0,0 +1,343 @@
1
+ """CDN streamer with defense in depth.
2
+
3
+ Single helper used by the service facade to download CDN-hosted media
4
+ (profile pictures, posts, stories, highlights) into a target directory.
5
+ All defenses live here so that command and service code never touch raw
6
+ HTTP for media.
7
+
8
+ Defenses (each one is exercised by tests/test_cdn.py):
9
+
10
+ - HTTPS-only (http:// rejected, http:// redirects rejected)
11
+ - Host allowlist (only `*.cdninstagram.com` and `*.fbcdn.net` accepted, both
12
+ for the initial URL and any redirect target)
13
+ - Filename built from the caller-supplied ``dest`` (intended to be the post
14
+ pk or similar stable id) plus an extension chosen from response
15
+ Content-Type cross-checked against magic-byte sniffing — the CDN-supplied
16
+ filename in the URL is ignored entirely
17
+ - Whitelist of extensions (.jpg/.jpeg/.png/.webp/.mp4/.mov)
18
+ - Per-resource byte budget (default 500 MB)
19
+ - Pre-flight free-disk check (default 1 GB minimum)
20
+ - Atomic write: stream to ``<final>.part``, fsync, rename → final
21
+ - Collision suffix: never overwrites an existing file; appends ``_<n>``
22
+ - ``mtime`` is set from ``taken_at`` if supplied (so file dates match post
23
+ capture time, not download time)
24
+ - macOS xattr tagging via ctypes ``setxattr(2)`` — adds
25
+ ``com.apple.metadata:kMDItemUserTags = insto`` so downloaded media is
26
+ visible in Finder Smart Folders. No-op on non-darwin. On darwin, an
27
+ ``OSError`` (e.g. NFS, exFAT) is reported once-per-process to stderr
28
+ and otherwise silently swallowed.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import contextlib
34
+ import ctypes
35
+ import ctypes.util
36
+ import os
37
+ import shutil
38
+ import sys
39
+ from datetime import datetime
40
+ from pathlib import Path
41
+ from urllib.parse import urljoin, urlparse
42
+
43
+ import httpx
44
+
45
+ from insto.exceptions import BackendError
46
+
47
+ ALLOWED_HOST_SUFFIXES: tuple[str, ...] = ("cdninstagram.com", "fbcdn.net")
48
+ ALLOWED_EXTENSIONS: frozenset[str] = frozenset({".jpg", ".jpeg", ".png", ".webp", ".mp4", ".mov"})
49
+
50
+ CT_TO_EXT: dict[str, str] = {
51
+ "image/jpeg": ".jpg",
52
+ "image/jpg": ".jpg",
53
+ "image/png": ".png",
54
+ "image/webp": ".webp",
55
+ "video/mp4": ".mp4",
56
+ "video/quicktime": ".mov",
57
+ }
58
+
59
+ DEFAULT_BYTE_BUDGET: int = 500 * 1024 * 1024
60
+ DEFAULT_MIN_FREE_DISK: int = 1024 * 1024 * 1024
61
+ SNIFF_SIZE: int = 512
62
+ CHUNK_SIZE: int = 64 * 1024
63
+ MAX_REDIRECTS: int = 5
64
+ DEFAULT_TIMEOUT: float = 30.0
65
+
66
+ _XATTR_NAME: bytes = b"com.apple.metadata:kMDItemUserTags"
67
+ _XATTR_VALUE: bytes = b"insto"
68
+ _XATTR_WARN_LINE: str = "note: filesystem does not support xattr; tagging skipped"
69
+
70
+ _xattr_warned: bool = False
71
+
72
+
73
+ def _is_host_allowed(host: str) -> bool:
74
+ host = host.lower()
75
+ return any(host == s or host.endswith("." + s) for s in ALLOWED_HOST_SUFFIXES)
76
+
77
+
78
+ def _normalize_ct(ct: str | None) -> str | None:
79
+ if not ct:
80
+ return None
81
+ return ct.split(";", 1)[0].strip().lower() or None
82
+
83
+
84
+ def _sniff(prefix: bytes) -> tuple[str, str] | None:
85
+ """Sniff magic bytes; return (extension, mime) or None."""
86
+ if prefix.startswith(b"\xff\xd8\xff"):
87
+ return ".jpg", "image/jpeg"
88
+ if prefix.startswith(b"\x89PNG\r\n\x1a\n"):
89
+ return ".png", "image/png"
90
+ if prefix.startswith(b"RIFF") and len(prefix) >= 12 and prefix[8:12] == b"WEBP":
91
+ return ".webp", "image/webp"
92
+ if len(prefix) >= 12 and prefix[4:8] == b"ftyp":
93
+ brand = prefix[8:12]
94
+ if brand == b"qt ":
95
+ return ".mov", "video/quicktime"
96
+ return ".mp4", "video/mp4"
97
+ return None
98
+
99
+
100
+ def _ct_compatible(declared: str, sniffed: str) -> bool:
101
+ if declared == sniffed:
102
+ return True
103
+ return {declared, sniffed} <= {"image/jpeg", "image/jpg"}
104
+
105
+
106
+ def _resolve_collision(base: Path) -> Path:
107
+ """Return a non-existing path; if ``base`` exists, append ``_1``, ``_2`` …"""
108
+ if not base.exists():
109
+ return base
110
+ stem, suffix, parent = base.stem, base.suffix, base.parent
111
+ n = 1
112
+ while True:
113
+ candidate = parent / f"{stem}_{n}{suffix}"
114
+ if not candidate.exists():
115
+ return candidate
116
+ n += 1
117
+
118
+
119
+ def _set_macos_tag(path: Path) -> None:
120
+ """Tag ``path`` with ``com.apple.metadata:kMDItemUserTags=insto`` on darwin.
121
+
122
+ No-op on other platforms. On darwin, errors from the underlying
123
+ ``setxattr(2)`` (e.g. filesystem without xattr support such as NFS or
124
+ exFAT) are swallowed and reported once per process to stderr.
125
+ """
126
+ global _xattr_warned
127
+ if sys.platform != "darwin":
128
+ return
129
+ libc_path = ctypes.util.find_library("c")
130
+ if libc_path is None: # pragma: no cover - libc is always present on darwin
131
+ return
132
+ libc = ctypes.CDLL(libc_path, use_errno=True)
133
+ libc.setxattr.argtypes = [
134
+ ctypes.c_char_p,
135
+ ctypes.c_char_p,
136
+ ctypes.c_void_p,
137
+ ctypes.c_size_t,
138
+ ctypes.c_uint32,
139
+ ctypes.c_int,
140
+ ]
141
+ libc.setxattr.restype = ctypes.c_int
142
+ rc = libc.setxattr(
143
+ str(path).encode("utf-8"),
144
+ _XATTR_NAME,
145
+ _XATTR_VALUE,
146
+ len(_XATTR_VALUE),
147
+ 0,
148
+ 0,
149
+ )
150
+ if rc != 0 and not _xattr_warned:
151
+ _xattr_warned = True
152
+ print(_XATTR_WARN_LINE, file=sys.stderr)
153
+
154
+
155
+ def _validate_url(url: str) -> None:
156
+ parsed = urlparse(url)
157
+ if parsed.scheme != "https":
158
+ raise BackendError(f"non-https CDN url rejected: {url}")
159
+ if not parsed.hostname or not _is_host_allowed(parsed.hostname):
160
+ raise BackendError(f"CDN host not in allowlist: {parsed.hostname}")
161
+
162
+
163
+ def _validate_redirect(current: str, location: str) -> str:
164
+ new_url = urljoin(current, location)
165
+ parsed = urlparse(new_url)
166
+ if parsed.scheme != "https":
167
+ raise BackendError(f"CDN redirect to non-https rejected: {new_url}")
168
+ if not parsed.hostname or not _is_host_allowed(parsed.hostname):
169
+ raise BackendError(f"CDN cross-host redirect rejected: {parsed.hostname}")
170
+ return new_url
171
+
172
+
173
+ def _decide_extension(declared_ct: str | None, sniffed: tuple[str, str]) -> str:
174
+ sniff_ext, sniff_mime = sniffed
175
+ if declared_ct is not None:
176
+ if not _ct_compatible(declared_ct, sniff_mime):
177
+ raise BackendError(
178
+ f"CDN content-type mismatch: header={declared_ct} sniff={sniff_mime}"
179
+ )
180
+ ext = CT_TO_EXT.get(declared_ct, sniff_ext)
181
+ else:
182
+ ext = sniff_ext
183
+ if ext not in ALLOWED_EXTENSIONS:
184
+ raise BackendError(f"CDN extension not in allowlist: {ext}")
185
+ return ext
186
+
187
+
188
+ def _coerce_taken_at(taken_at: datetime | float | int | None) -> float | None:
189
+ if taken_at is None:
190
+ return None
191
+ if isinstance(taken_at, datetime):
192
+ return taken_at.timestamp()
193
+ return float(taken_at)
194
+
195
+
196
+ async def stream_to_file(
197
+ url: str,
198
+ dest: Path,
199
+ *,
200
+ content_type_hint: str | None = None,
201
+ byte_budget: int = DEFAULT_BYTE_BUDGET,
202
+ taken_at: datetime | float | int | None = None,
203
+ client: httpx.AsyncClient | None = None,
204
+ min_free_disk: int = DEFAULT_MIN_FREE_DISK,
205
+ timeout: float = DEFAULT_TIMEOUT,
206
+ ) -> Path:
207
+ """Stream a CDN URL to ``dest`` (path *without* extension).
208
+
209
+ The chosen extension comes from the response Content-Type, cross-checked
210
+ against magic-byte sniffing of the leading bytes; if both the header and
211
+ sniff agree, that extension is appended to ``dest``. If the resulting
212
+ path already exists, ``_1`` / ``_2`` … is appended before the extension.
213
+
214
+ Returns the path actually written.
215
+ """
216
+
217
+ _validate_url(url)
218
+
219
+ parent = dest.parent
220
+ parent.mkdir(parents=True, exist_ok=True)
221
+
222
+ free = shutil.disk_usage(parent).free
223
+ if free < min_free_disk:
224
+ raise BackendError(f"insufficient disk space: {free} bytes free < {min_free_disk}")
225
+
226
+ owns_client = client is None
227
+ if client is None:
228
+ client = httpx.AsyncClient(follow_redirects=False, timeout=timeout)
229
+
230
+ try:
231
+ return await _download_with_redirects(
232
+ url=url,
233
+ dest=dest,
234
+ content_type_hint=content_type_hint,
235
+ byte_budget=byte_budget,
236
+ taken_at=_coerce_taken_at(taken_at),
237
+ client=client,
238
+ )
239
+ finally:
240
+ if owns_client:
241
+ await client.aclose()
242
+
243
+
244
+ async def _download_with_redirects(
245
+ *,
246
+ url: str,
247
+ dest: Path,
248
+ content_type_hint: str | None,
249
+ byte_budget: int,
250
+ taken_at: float | None,
251
+ client: httpx.AsyncClient,
252
+ ) -> Path:
253
+ current_url = url
254
+ redirects = 0
255
+ while True:
256
+ async with client.stream("GET", current_url) as resp:
257
+ if 300 <= resp.status_code < 400:
258
+ if redirects >= MAX_REDIRECTS:
259
+ raise BackendError("too many CDN redirects")
260
+ redirects += 1
261
+ location = resp.headers.get("location")
262
+ if not location:
263
+ raise BackendError("CDN redirect without Location header")
264
+ current_url = _validate_redirect(current_url, location)
265
+ continue
266
+
267
+ if resp.status_code != 200:
268
+ raise BackendError(f"CDN GET failed: HTTP {resp.status_code}")
269
+
270
+ declared_ct = _normalize_ct(resp.headers.get("content-type")) or _normalize_ct(
271
+ content_type_hint
272
+ )
273
+ return await _stream_response(
274
+ resp=resp,
275
+ dest=dest,
276
+ declared_ct=declared_ct,
277
+ byte_budget=byte_budget,
278
+ taken_at=taken_at,
279
+ )
280
+
281
+
282
+ async def _stream_response(
283
+ *,
284
+ resp: httpx.Response,
285
+ dest: Path,
286
+ declared_ct: str | None,
287
+ byte_budget: int,
288
+ taken_at: float | None,
289
+ ) -> Path:
290
+ parent = dest.parent
291
+ tmp_path = parent / (dest.name + ".part")
292
+ if tmp_path.exists():
293
+ tmp_path.unlink()
294
+
295
+ sniff_buf = bytearray()
296
+ sniffed: tuple[str, str] | None = None
297
+ total = 0
298
+
299
+ try:
300
+ with open(tmp_path, "wb") as fh:
301
+ async for chunk in resp.aiter_bytes(CHUNK_SIZE):
302
+ if not chunk:
303
+ continue
304
+ if sniffed is None and len(sniff_buf) < SNIFF_SIZE:
305
+ sniff_buf.extend(chunk[: SNIFF_SIZE - len(sniff_buf)])
306
+ if len(sniff_buf) >= SNIFF_SIZE:
307
+ sniffed = _sniff(bytes(sniff_buf))
308
+ if sniffed is None:
309
+ raise BackendError("CDN content-type sniff failed: unknown magic bytes")
310
+ if declared_ct is not None and not _ct_compatible(declared_ct, sniffed[1]):
311
+ raise BackendError(
312
+ "CDN content-type mismatch: "
313
+ f"header={declared_ct} sniff={sniffed[1]}"
314
+ )
315
+ if total + len(chunk) > byte_budget:
316
+ raise BackendError(f"CDN response exceeded byte budget {byte_budget}")
317
+ fh.write(chunk)
318
+ total += len(chunk)
319
+
320
+ if sniffed is None:
321
+ if not sniff_buf:
322
+ raise BackendError("CDN response was empty")
323
+ sniffed = _sniff(bytes(sniff_buf))
324
+ if sniffed is None:
325
+ raise BackendError("CDN content-type sniff failed: unknown magic bytes")
326
+ fh.flush()
327
+ os.fsync(fh.fileno())
328
+
329
+ ext = _decide_extension(declared_ct, sniffed)
330
+ final_base = parent / (dest.name + ext)
331
+ final = _resolve_collision(final_base)
332
+ os.rename(tmp_path, final)
333
+
334
+ if taken_at is not None:
335
+ os.utime(final, (taken_at, taken_at))
336
+
337
+ _set_macos_tag(final)
338
+ return final
339
+ except BaseException:
340
+ if tmp_path.exists():
341
+ with contextlib.suppress(OSError):
342
+ tmp_path.unlink()
343
+ raise