fetchurl-sdk 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fetchurl_sdk-0.1.4/.gitignore +2 -0
- fetchurl_sdk-0.1.4/.python-version +1 -0
- fetchurl_sdk-0.1.4/PKG-INFO +22 -0
- fetchurl_sdk-0.1.4/README.md +3 -0
- fetchurl_sdk-0.1.4/fetchurl/__init__.py +417 -0
- fetchurl_sdk-0.1.4/mise.toml +3 -0
- fetchurl_sdk-0.1.4/pyproject.toml +30 -0
- fetchurl_sdk-0.1.4/test_fetchurl.py +245 -0
- fetchurl_sdk-0.1.4/uv.lock +8 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fetchurl-sdk
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: Protocol-level client SDK for fetchurl content-addressable cache servers
|
|
5
|
+
Project-URL: Homepage, https://github.com/lucasew/fetchurl
|
|
6
|
+
Project-URL: Repository, https://github.com/lucasew/fetchurl
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/lucasew/fetchurl/issues
|
|
8
|
+
Author: lucasew
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: cache,content-addressable,fetchurl,hash,sha256
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
17
|
+
Requires-Python: >=3.8
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# fetchurl
|
|
21
|
+
|
|
22
|
+
Simple caching server for URLs with a hash.
|
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
"""Fetchurl SDK for Python.
|
|
2
|
+
|
|
3
|
+
Protocol-level client for fetchurl content-addressable cache servers.
|
|
4
|
+
Works with any HTTP library through the Fetcher/AsyncFetcher protocols.
|
|
5
|
+
|
|
6
|
+
Zero dependencies — uses only the Python standard library.
|
|
7
|
+
|
|
8
|
+
Three levels of usage:
|
|
9
|
+
|
|
10
|
+
# 1. One-liner with stdlib
|
|
11
|
+
fetchurl.fetch(UrllibFetcher(), "sha256", hash, urls, output)
|
|
12
|
+
|
|
13
|
+
# 2. Custom HTTP client — implement the Fetcher protocol
|
|
14
|
+
class MyFetcher:
|
|
15
|
+
def get(self, url, headers):
|
|
16
|
+
resp = requests.get(url, headers=headers, stream=True)
|
|
17
|
+
return (resp.status_code, resp.raw)
|
|
18
|
+
|
|
19
|
+
fetchurl.fetch(MyFetcher(), "sha256", hash, urls, output)
|
|
20
|
+
|
|
21
|
+
# 3. Low-level — drive the state machine yourself
|
|
22
|
+
session = FetchSession("sha256", hash, urls)
|
|
23
|
+
while attempt := session.next_attempt():
|
|
24
|
+
# make HTTP request with whatever library you want
|
|
25
|
+
...
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import hashlib
|
|
31
|
+
import os
|
|
32
|
+
import random
|
|
33
|
+
import re
|
|
34
|
+
from collections.abc import AsyncIterator
|
|
35
|
+
from dataclasses import dataclass, field
|
|
36
|
+
from typing import BinaryIO, Protocol, runtime_checkable
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# --- Errors ---
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FetchUrlError(Exception):
|
|
43
|
+
"""Base exception for fetchurl SDK."""
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class UnsupportedAlgorithmError(FetchUrlError):
|
|
47
|
+
"""The requested hash algorithm is not supported."""
|
|
48
|
+
|
|
49
|
+
def __init__(self, algo: str):
|
|
50
|
+
self.algo = algo
|
|
51
|
+
super().__init__(f"unsupported algorithm: {algo}")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class HashMismatchError(FetchUrlError):
|
|
55
|
+
"""The content hash does not match the expected hash."""
|
|
56
|
+
|
|
57
|
+
def __init__(self, expected: str, actual: str):
|
|
58
|
+
self.expected = expected
|
|
59
|
+
self.actual = actual
|
|
60
|
+
super().__init__(f"hash mismatch: expected {expected}, got {actual}")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class AllSourcesFailedError(FetchUrlError):
|
|
64
|
+
"""All servers and sources failed to provide the content."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, last_error: Exception | None = None):
|
|
67
|
+
self.last_error = last_error
|
|
68
|
+
super().__init__("all sources failed")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class PartialWriteError(FetchUrlError):
|
|
72
|
+
"""Bytes were written before failure; output is tainted."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, cause: Exception):
|
|
75
|
+
self.cause = cause
|
|
76
|
+
super().__init__(f"partial write: {cause}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# --- Algorithm helpers ---
|
|
80
|
+
|
|
81
|
+
_SUPPORTED_ALGOS = {"sha1", "sha256", "sha512"}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def normalize_algo(name: str) -> str:
|
|
85
|
+
"""Normalize algorithm name per spec: lowercase, only [a-z0-9]."""
|
|
86
|
+
return re.sub(r"[^a-z0-9]", "", name.lower())
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def is_supported(algo: str) -> bool:
|
|
90
|
+
"""Check if a hash algorithm is supported."""
|
|
91
|
+
return normalize_algo(algo) in _SUPPORTED_ALGOS
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# --- SFV helpers (RFC 8941 string lists) ---
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def encode_source_urls(urls: list[str]) -> str:
|
|
98
|
+
"""Encode URLs as an RFC 8941 string list for X-Source-Urls header."""
|
|
99
|
+
return ", ".join(
|
|
100
|
+
'"' + url.replace("\\", "\\\\").replace('"', '\\"') + '"' for url in urls
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def parse_fetchurl_server(value: str) -> list[str]:
|
|
105
|
+
"""Parse FETCHURL_SERVER env var (RFC 8941 string list)."""
|
|
106
|
+
results: list[str] = []
|
|
107
|
+
i = 0
|
|
108
|
+
while i < len(value):
|
|
109
|
+
while i < len(value) and value[i] in " \t":
|
|
110
|
+
i += 1
|
|
111
|
+
if i >= len(value):
|
|
112
|
+
break
|
|
113
|
+
if value[i] != '"':
|
|
114
|
+
while i < len(value) and value[i] != ",":
|
|
115
|
+
i += 1
|
|
116
|
+
if i < len(value):
|
|
117
|
+
i += 1
|
|
118
|
+
continue
|
|
119
|
+
i += 1
|
|
120
|
+
s: list[str] = []
|
|
121
|
+
while i < len(value):
|
|
122
|
+
if value[i] == "\\" and i + 1 < len(value):
|
|
123
|
+
s.append(value[i + 1])
|
|
124
|
+
i += 2
|
|
125
|
+
elif value[i] == '"':
|
|
126
|
+
i += 1
|
|
127
|
+
break
|
|
128
|
+
else:
|
|
129
|
+
s.append(value[i])
|
|
130
|
+
i += 1
|
|
131
|
+
results.append("".join(s))
|
|
132
|
+
while i < len(value) and value[i] != ",":
|
|
133
|
+
i += 1
|
|
134
|
+
if i < len(value):
|
|
135
|
+
i += 1
|
|
136
|
+
return results
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# --- FetchAttempt ---
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass(frozen=True)
|
|
143
|
+
class FetchAttempt:
|
|
144
|
+
"""A single fetch attempt with URL and headers."""
|
|
145
|
+
|
|
146
|
+
url: str
|
|
147
|
+
headers: dict[str, str] = field(default_factory=dict)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# --- HashVerifier ---
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class HashVerifier:
|
|
154
|
+
"""Wraps a binary writer, computes hash, verifies on finish().
|
|
155
|
+
|
|
156
|
+
Usage::
|
|
157
|
+
|
|
158
|
+
verifier = session.verifier(output_file)
|
|
159
|
+
while chunk := body.read(65536):
|
|
160
|
+
verifier.write(chunk)
|
|
161
|
+
verifier.finish() # raises HashMismatchError on failure
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
def __init__(self, algo: str, expected_hash: str, writer: BinaryIO):
|
|
165
|
+
self._writer = writer
|
|
166
|
+
self._hasher = hashlib.new(normalize_algo(algo))
|
|
167
|
+
self._expected = expected_hash
|
|
168
|
+
self._bytes_written = 0
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def bytes_written(self) -> int:
|
|
172
|
+
return self._bytes_written
|
|
173
|
+
|
|
174
|
+
def write(self, data: bytes) -> int:
|
|
175
|
+
n = self._writer.write(data)
|
|
176
|
+
if n is None:
|
|
177
|
+
n = len(data)
|
|
178
|
+
self._hasher.update(data[:n])
|
|
179
|
+
self._bytes_written += n
|
|
180
|
+
return n
|
|
181
|
+
|
|
182
|
+
def finish(self) -> None:
|
|
183
|
+
"""Verify hash. Raises HashMismatchError on failure."""
|
|
184
|
+
actual = self._hasher.hexdigest()
|
|
185
|
+
if actual != self._expected:
|
|
186
|
+
raise HashMismatchError(self._expected, actual)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# --- FetchSession ---
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class FetchSession:
|
|
193
|
+
"""State machine driving the fetchurl client protocol.
|
|
194
|
+
|
|
195
|
+
Servers are tried first (with X-Source-Urls header forwarded),
|
|
196
|
+
then direct source URLs in random order per spec.
|
|
197
|
+
|
|
198
|
+
The caller iterates through attempts, makes HTTP requests
|
|
199
|
+
with their preferred library, and reports results back::
|
|
200
|
+
|
|
201
|
+
session = FetchSession(servers, "sha256", hash, source_urls)
|
|
202
|
+
while attempt := session.next_attempt():
|
|
203
|
+
# attempt.url and attempt.headers tell you what to request
|
|
204
|
+
...
|
|
205
|
+
session.report_success() # or report_partial()
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def __init__(
|
|
209
|
+
self,
|
|
210
|
+
algo: str,
|
|
211
|
+
hash: str,
|
|
212
|
+
source_urls: list[str],
|
|
213
|
+
):
|
|
214
|
+
servers = parse_fetchurl_server(os.environ.get("FETCHURL_SERVER", ""))
|
|
215
|
+
algo = normalize_algo(algo)
|
|
216
|
+
if not is_supported(algo):
|
|
217
|
+
raise UnsupportedAlgorithmError(algo)
|
|
218
|
+
|
|
219
|
+
self._algo = algo
|
|
220
|
+
self._hash = hash
|
|
221
|
+
self._done = False
|
|
222
|
+
self._success = False
|
|
223
|
+
self._attempts: list[FetchAttempt] = []
|
|
224
|
+
self._current = 0
|
|
225
|
+
|
|
226
|
+
source_header = encode_source_urls(source_urls) if source_urls else None
|
|
227
|
+
|
|
228
|
+
for server in servers:
|
|
229
|
+
base = server.rstrip("/")
|
|
230
|
+
url = f"{base}/api/fetchurl/{algo}/{hash}"
|
|
231
|
+
headers: dict[str, str] = {}
|
|
232
|
+
if source_header:
|
|
233
|
+
headers["X-Source-Urls"] = source_header
|
|
234
|
+
self._attempts.append(FetchAttempt(url=url, headers=headers))
|
|
235
|
+
|
|
236
|
+
direct = list(source_urls)
|
|
237
|
+
random.shuffle(direct)
|
|
238
|
+
for url in direct:
|
|
239
|
+
self._attempts.append(FetchAttempt(url=url))
|
|
240
|
+
|
|
241
|
+
def next_attempt(self) -> FetchAttempt | None:
|
|
242
|
+
"""Get the next attempt, or None if session is finished.
|
|
243
|
+
|
|
244
|
+
If an attempt fails without writing bytes, just call next_attempt() again.
|
|
245
|
+
"""
|
|
246
|
+
if self._done or self._current >= len(self._attempts):
|
|
247
|
+
return None
|
|
248
|
+
attempt = self._attempts[self._current]
|
|
249
|
+
self._current += 1
|
|
250
|
+
return attempt
|
|
251
|
+
|
|
252
|
+
def report_success(self) -> None:
|
|
253
|
+
"""Mark the session as successful. Stops further attempts."""
|
|
254
|
+
self._done = True
|
|
255
|
+
self._success = True
|
|
256
|
+
|
|
257
|
+
def report_partial(self) -> None:
|
|
258
|
+
"""Mark that bytes were written before failure. Stops further attempts."""
|
|
259
|
+
self._done = True
|
|
260
|
+
|
|
261
|
+
def succeeded(self) -> bool:
|
|
262
|
+
return self._success
|
|
263
|
+
|
|
264
|
+
def verifier(self, writer: BinaryIO) -> HashVerifier:
|
|
265
|
+
"""Create a HashVerifier for this session's algo and expected hash."""
|
|
266
|
+
return HashVerifier(self._algo, self._hash, writer)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# --- Fetcher protocols ---
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
@runtime_checkable
|
|
273
|
+
class Fetcher(Protocol):
|
|
274
|
+
"""Sync HTTP client protocol.
|
|
275
|
+
|
|
276
|
+
Implement this to plug in any HTTP library.
|
|
277
|
+
|
|
278
|
+
Example with requests::
|
|
279
|
+
|
|
280
|
+
class RequestsFetcher:
|
|
281
|
+
def get(self, url, headers):
|
|
282
|
+
resp = requests.get(url, headers=headers, stream=True)
|
|
283
|
+
return (resp.status_code, resp.raw)
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
def get(self, url: str, headers: dict[str, str]) -> tuple[int, BinaryIO]:
|
|
287
|
+
"""Make a GET request. Returns (status_code, readable_body)."""
|
|
288
|
+
...
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
@runtime_checkable
|
|
292
|
+
class AsyncFetcher(Protocol):
|
|
293
|
+
"""Async HTTP client protocol.
|
|
294
|
+
|
|
295
|
+
Implement this to plug in any async HTTP library.
|
|
296
|
+
|
|
297
|
+
Example with aiohttp::
|
|
298
|
+
|
|
299
|
+
class AiohttpFetcher:
|
|
300
|
+
def __init__(self):
|
|
301
|
+
self._session = aiohttp.ClientSession()
|
|
302
|
+
|
|
303
|
+
async def get(self, url, headers):
|
|
304
|
+
resp = await self._session.get(url, headers=headers)
|
|
305
|
+
return (resp.status, resp.content.iter_chunked(65536))
|
|
306
|
+
"""
|
|
307
|
+
|
|
308
|
+
async def get(
|
|
309
|
+
self, url: str, headers: dict[str, str]
|
|
310
|
+
) -> tuple[int, AsyncIterator[bytes]]:
|
|
311
|
+
"""Make a GET request. Returns (status_code, async_body_chunks)."""
|
|
312
|
+
...
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# --- UrllibFetcher (stdlib, zero deps) ---
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class UrllibFetcher:
|
|
319
|
+
"""Fetcher using urllib.request (stdlib, zero dependencies)."""
|
|
320
|
+
|
|
321
|
+
def get(self, url: str, headers: dict[str, str]) -> tuple[int, BinaryIO]:
|
|
322
|
+
import urllib.error
|
|
323
|
+
import urllib.request
|
|
324
|
+
|
|
325
|
+
req = urllib.request.Request(url, headers=headers)
|
|
326
|
+
try:
|
|
327
|
+
resp = urllib.request.urlopen(req)
|
|
328
|
+
return (resp.status, resp)
|
|
329
|
+
except urllib.error.HTTPError as e:
|
|
330
|
+
return (e.code, e)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# --- Convenience functions ---
|
|
334
|
+
|
|
335
|
+
_CHUNK_SIZE = 64 * 1024
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def fetch(
|
|
339
|
+
fetcher: Fetcher,
|
|
340
|
+
algo: str,
|
|
341
|
+
hash: str,
|
|
342
|
+
source_urls: list[str],
|
|
343
|
+
out: BinaryIO,
|
|
344
|
+
) -> None:
|
|
345
|
+
"""High-level sync fetch. Handles the full protocol loop.
|
|
346
|
+
|
|
347
|
+
Raises AllSourcesFailedError or PartialWriteError on failure.
|
|
348
|
+
"""
|
|
349
|
+
session = FetchSession(algo, hash, source_urls)
|
|
350
|
+
last_error: Exception | None = None
|
|
351
|
+
|
|
352
|
+
while attempt := session.next_attempt():
|
|
353
|
+
try:
|
|
354
|
+
status, body = fetcher.get(attempt.url, dict(attempt.headers))
|
|
355
|
+
except Exception as e:
|
|
356
|
+
last_error = e
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
if status != 200:
|
|
360
|
+
last_error = Exception(f"unexpected status {status}")
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
verifier = session.verifier(out)
|
|
364
|
+
try:
|
|
365
|
+
while chunk := body.read(_CHUNK_SIZE):
|
|
366
|
+
verifier.write(chunk)
|
|
367
|
+
verifier.finish()
|
|
368
|
+
session.report_success()
|
|
369
|
+
return
|
|
370
|
+
except Exception as e:
|
|
371
|
+
last_error = e
|
|
372
|
+
if verifier.bytes_written > 0:
|
|
373
|
+
session.report_partial()
|
|
374
|
+
raise PartialWriteError(e) from e
|
|
375
|
+
|
|
376
|
+
raise AllSourcesFailedError(last_error)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
async def async_fetch(
|
|
380
|
+
fetcher: AsyncFetcher,
|
|
381
|
+
algo: str,
|
|
382
|
+
hash: str,
|
|
383
|
+
source_urls: list[str],
|
|
384
|
+
out: BinaryIO,
|
|
385
|
+
) -> None:
|
|
386
|
+
"""High-level async fetch. Handles the full protocol loop.
|
|
387
|
+
|
|
388
|
+
Raises AllSourcesFailedError or PartialWriteError on failure.
|
|
389
|
+
"""
|
|
390
|
+
session = FetchSession(algo, hash, source_urls)
|
|
391
|
+
last_error: Exception | None = None
|
|
392
|
+
|
|
393
|
+
while attempt := session.next_attempt():
|
|
394
|
+
try:
|
|
395
|
+
status, chunks = await fetcher.get(attempt.url, dict(attempt.headers))
|
|
396
|
+
except Exception as e:
|
|
397
|
+
last_error = e
|
|
398
|
+
continue
|
|
399
|
+
|
|
400
|
+
if status != 200:
|
|
401
|
+
last_error = Exception(f"unexpected status {status}")
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
verifier = session.verifier(out)
|
|
405
|
+
try:
|
|
406
|
+
async for chunk in chunks:
|
|
407
|
+
verifier.write(chunk)
|
|
408
|
+
verifier.finish()
|
|
409
|
+
session.report_success()
|
|
410
|
+
return
|
|
411
|
+
except Exception as e:
|
|
412
|
+
last_error = e
|
|
413
|
+
if verifier.bytes_written > 0:
|
|
414
|
+
session.report_partial()
|
|
415
|
+
raise PartialWriteError(e) from e
|
|
416
|
+
|
|
417
|
+
raise AllSourcesFailedError(last_error)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fetchurl-sdk"
|
|
7
|
+
version = "0.1.4"
|
|
8
|
+
description = "Protocol-level client SDK for fetchurl content-addressable cache servers"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
dependencies = []
|
|
12
|
+
license = { text = "MIT" }
|
|
13
|
+
authors = [{ name = "lucasew" }]
|
|
14
|
+
keywords = ["fetchurl", "cache", "content-addressable", "hash", "sha256"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Topic :: Internet :: WWW/HTTP",
|
|
21
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.urls]
|
|
25
|
+
Homepage = "https://github.com/lucasew/fetchurl"
|
|
26
|
+
Repository = "https://github.com/lucasew/fetchurl"
|
|
27
|
+
"Bug Tracker" = "https://github.com/lucasew/fetchurl/issues"
|
|
28
|
+
|
|
29
|
+
[tool.hatch.build.targets.wheel]
|
|
30
|
+
packages = ["fetchurl"]
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Tests for fetchurl SDK."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import io
|
|
5
|
+
import os
|
|
6
|
+
import unittest
|
|
7
|
+
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
8
|
+
from threading import Thread
|
|
9
|
+
from unittest.mock import patch
|
|
10
|
+
|
|
11
|
+
import fetchurl
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def sha256hex(data: bytes) -> str:
|
|
15
|
+
return hashlib.sha256(data).hexdigest()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestNormalizeAlgo(unittest.TestCase):
|
|
19
|
+
def test_lowercase(self):
|
|
20
|
+
self.assertEqual(fetchurl.normalize_algo("SHA-256"), "sha256")
|
|
21
|
+
|
|
22
|
+
def test_already_normalized(self):
|
|
23
|
+
self.assertEqual(fetchurl.normalize_algo("sha256"), "sha256")
|
|
24
|
+
|
|
25
|
+
def test_strips_non_alnum(self):
|
|
26
|
+
self.assertEqual(fetchurl.normalize_algo("SHA_512"), "sha512")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestIsSupported(unittest.TestCase):
|
|
30
|
+
def test_supported(self):
|
|
31
|
+
self.assertTrue(fetchurl.is_supported("sha256"))
|
|
32
|
+
self.assertTrue(fetchurl.is_supported("SHA-256"))
|
|
33
|
+
self.assertTrue(fetchurl.is_supported("sha1"))
|
|
34
|
+
self.assertTrue(fetchurl.is_supported("sha512"))
|
|
35
|
+
|
|
36
|
+
def test_unsupported(self):
|
|
37
|
+
self.assertFalse(fetchurl.is_supported("md5"))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TestSFV(unittest.TestCase):
|
|
41
|
+
def test_encode(self):
|
|
42
|
+
self.assertEqual(
|
|
43
|
+
fetchurl.encode_source_urls(["https://a.com", "https://b.com"]),
|
|
44
|
+
'"https://a.com", "https://b.com"',
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def test_parse(self):
|
|
48
|
+
parsed = fetchurl.parse_fetchurl_server('"https://a.com", "https://b.com"')
|
|
49
|
+
self.assertEqual(parsed, ["https://a.com", "https://b.com"])
|
|
50
|
+
|
|
51
|
+
def test_roundtrip(self):
|
|
52
|
+
urls = ["https://cdn.example.com/f.tar.gz", "https://mirror.org/a.tgz"]
|
|
53
|
+
encoded = fetchurl.encode_source_urls(urls)
|
|
54
|
+
decoded = fetchurl.parse_fetchurl_server(encoded)
|
|
55
|
+
self.assertEqual(decoded, urls)
|
|
56
|
+
|
|
57
|
+
def test_parse_with_params(self):
|
|
58
|
+
parsed = fetchurl.parse_fetchurl_server('"https://a.com";q=0.9, "https://b.com"')
|
|
59
|
+
self.assertEqual(parsed, ["https://a.com", "https://b.com"])
|
|
60
|
+
|
|
61
|
+
def test_empty(self):
|
|
62
|
+
self.assertEqual(fetchurl.parse_fetchurl_server(""), [])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class TestHashVerifier(unittest.TestCase):
|
|
66
|
+
def test_success(self):
|
|
67
|
+
data = b"hello world"
|
|
68
|
+
h = sha256hex(data)
|
|
69
|
+
out = io.BytesIO()
|
|
70
|
+
v = fetchurl.HashVerifier("sha256", h, out)
|
|
71
|
+
v.write(data)
|
|
72
|
+
self.assertEqual(v.bytes_written, len(data))
|
|
73
|
+
v.finish()
|
|
74
|
+
self.assertEqual(out.getvalue(), data)
|
|
75
|
+
|
|
76
|
+
def test_mismatch(self):
|
|
77
|
+
data = b"hello world"
|
|
78
|
+
wrong_hash = sha256hex(b"wrong")
|
|
79
|
+
out = io.BytesIO()
|
|
80
|
+
v = fetchurl.HashVerifier("sha256", wrong_hash, out)
|
|
81
|
+
v.write(data)
|
|
82
|
+
with self.assertRaises(fetchurl.HashMismatchError) as ctx:
|
|
83
|
+
v.finish()
|
|
84
|
+
self.assertEqual(ctx.exception.expected, wrong_hash)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class TestFetchSession(unittest.TestCase):
|
|
88
|
+
def test_unsupported_algo(self):
|
|
89
|
+
with self.assertRaises(fetchurl.UnsupportedAlgorithmError):
|
|
90
|
+
fetchurl.FetchSession("md5", "abc", ["http://src"])
|
|
91
|
+
|
|
92
|
+
@patch.dict(os.environ, {"FETCHURL_SERVER": '"http://cache1", "http://cache2"'})
|
|
93
|
+
def test_attempt_ordering(self):
|
|
94
|
+
h = sha256hex(b"test")
|
|
95
|
+
session = fetchurl.FetchSession(
|
|
96
|
+
"sha256", h, ["http://src1"]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
a1 = session.next_attempt()
|
|
100
|
+
self.assertIsNotNone(a1)
|
|
101
|
+
self.assertTrue(a1.url.startswith("http://cache1/api/fetchurl/sha256/"))
|
|
102
|
+
self.assertIn("X-Source-Urls", a1.headers)
|
|
103
|
+
|
|
104
|
+
a2 = session.next_attempt()
|
|
105
|
+
self.assertTrue(a2.url.startswith("http://cache2/api/fetchurl/sha256/"))
|
|
106
|
+
|
|
107
|
+
a3 = session.next_attempt()
|
|
108
|
+
self.assertEqual(a3.url, "http://src1")
|
|
109
|
+
self.assertEqual(a3.headers, {})
|
|
110
|
+
|
|
111
|
+
self.assertIsNone(session.next_attempt())
|
|
112
|
+
self.assertFalse(session.succeeded())
|
|
113
|
+
|
|
114
|
+
@patch.dict(os.environ, {"FETCHURL_SERVER": '"http://cache"'})
|
|
115
|
+
def test_success_stops(self):
|
|
116
|
+
h = sha256hex(b"test")
|
|
117
|
+
session = fetchurl.FetchSession("sha256", h, ["http://src"])
|
|
118
|
+
session.next_attempt()
|
|
119
|
+
session.report_success()
|
|
120
|
+
self.assertTrue(session.succeeded())
|
|
121
|
+
self.assertIsNone(session.next_attempt())
|
|
122
|
+
|
|
123
|
+
@patch.dict(os.environ, {"FETCHURL_SERVER": '"http://cache"'})
|
|
124
|
+
def test_partial_stops(self):
|
|
125
|
+
h = sha256hex(b"test")
|
|
126
|
+
session = fetchurl.FetchSession("sha256", h, ["http://src"])
|
|
127
|
+
session.next_attempt()
|
|
128
|
+
session.report_partial()
|
|
129
|
+
self.assertFalse(session.succeeded())
|
|
130
|
+
self.assertIsNone(session.next_attempt())
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class TestFetch(unittest.TestCase):
|
|
134
|
+
"""Integration tests using a real HTTP server."""
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _start_server(handler_class) -> tuple[HTTPServer, str]:
|
|
138
|
+
server = HTTPServer(("127.0.0.1", 0), handler_class)
|
|
139
|
+
port = server.server_address[1]
|
|
140
|
+
thread = Thread(target=server.serve_forever, daemon=True)
|
|
141
|
+
thread.start()
|
|
142
|
+
return server, f"http://127.0.0.1:{port}"
|
|
143
|
+
|
|
144
|
+
def test_direct_download(self):
|
|
145
|
+
content = b"test content"
|
|
146
|
+
h = sha256hex(content)
|
|
147
|
+
|
|
148
|
+
class Handler(BaseHTTPRequestHandler):
|
|
149
|
+
def do_GET(self):
|
|
150
|
+
self.send_response(200)
|
|
151
|
+
self.end_headers()
|
|
152
|
+
self.wfile.write(content)
|
|
153
|
+
|
|
154
|
+
def log_message(self, *args):
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
server, url = self._start_server(Handler)
|
|
158
|
+
try:
|
|
159
|
+
out = io.BytesIO()
|
|
160
|
+
# Empty servers (env var not set by default/or empty)
|
|
161
|
+
with patch.dict(os.environ, {}, clear=True):
|
|
162
|
+
fetchurl.fetch(fetchurl.UrllibFetcher(), "sha256", h, [url], out)
|
|
163
|
+
self.assertEqual(out.getvalue(), content)
|
|
164
|
+
finally:
|
|
165
|
+
server.shutdown()
|
|
166
|
+
|
|
167
|
+
def test_hash_mismatch_raises_partial(self):
|
|
168
|
+
class Handler(BaseHTTPRequestHandler):
|
|
169
|
+
def do_GET(self):
|
|
170
|
+
self.send_response(200)
|
|
171
|
+
self.end_headers()
|
|
172
|
+
self.wfile.write(b"wrong content")
|
|
173
|
+
|
|
174
|
+
def log_message(self, *args):
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
server, url = self._start_server(Handler)
|
|
178
|
+
try:
|
|
179
|
+
out = io.BytesIO()
|
|
180
|
+
with self.assertRaises(fetchurl.PartialWriteError):
|
|
181
|
+
with patch.dict(os.environ, {}, clear=True):
|
|
182
|
+
fetchurl.fetch(
|
|
183
|
+
fetchurl.UrllibFetcher(), "sha256", sha256hex(b"right"), [url], out
|
|
184
|
+
)
|
|
185
|
+
finally:
|
|
186
|
+
server.shutdown()
|
|
187
|
+
|
|
188
|
+
def test_all_sources_failed(self):
|
|
189
|
+
class Handler(BaseHTTPRequestHandler):
|
|
190
|
+
def do_GET(self):
|
|
191
|
+
self.send_response(404)
|
|
192
|
+
self.end_headers()
|
|
193
|
+
|
|
194
|
+
def log_message(self, *args):
|
|
195
|
+
pass
|
|
196
|
+
|
|
197
|
+
server, url = self._start_server(Handler)
|
|
198
|
+
try:
|
|
199
|
+
out = io.BytesIO()
|
|
200
|
+
with self.assertRaises(fetchurl.AllSourcesFailedError):
|
|
201
|
+
with patch.dict(os.environ, {}, clear=True):
|
|
202
|
+
fetchurl.fetch(
|
|
203
|
+
fetchurl.UrllibFetcher(), "sha256", sha256hex(b"x"), [url], out
|
|
204
|
+
)
|
|
205
|
+
finally:
|
|
206
|
+
server.shutdown()
|
|
207
|
+
|
|
208
|
+
def test_server_fallback_to_direct(self):
|
|
209
|
+
content = b"fallback content"
|
|
210
|
+
h = sha256hex(content)
|
|
211
|
+
|
|
212
|
+
class BadServer(BaseHTTPRequestHandler):
|
|
213
|
+
def do_GET(self):
|
|
214
|
+
self.send_response(500)
|
|
215
|
+
self.end_headers()
|
|
216
|
+
|
|
217
|
+
def log_message(self, *args):
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
class GoodSource(BaseHTTPRequestHandler):
|
|
221
|
+
def do_GET(self):
|
|
222
|
+
self.send_response(200)
|
|
223
|
+
self.end_headers()
|
|
224
|
+
self.wfile.write(content)
|
|
225
|
+
|
|
226
|
+
def log_message(self, *args):
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
bad, bad_url = self._start_server(BadServer)
|
|
230
|
+
good, good_url = self._start_server(GoodSource)
|
|
231
|
+
try:
|
|
232
|
+
out = io.BytesIO()
|
|
233
|
+
# Set server via env var
|
|
234
|
+
with patch.dict(os.environ, {"FETCHURL_SERVER": f'"{bad_url}"'}):
|
|
235
|
+
fetchurl.fetch(
|
|
236
|
+
fetchurl.UrllibFetcher(), "sha256", h, [good_url], out
|
|
237
|
+
)
|
|
238
|
+
self.assertEqual(out.getvalue(), content)
|
|
239
|
+
finally:
|
|
240
|
+
bad.shutdown()
|
|
241
|
+
good.shutdown()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
unittest.main()
|