frugon 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frugon/__init__.py +10 -0
- frugon/_progress.py +324 -0
- frugon/_store.py +185 -0
- frugon/capture.py +503 -0
- frugon/cli.py +1489 -0
- frugon/cost.py +1474 -0
- frugon/data/pricing.json +121 -0
- frugon/data/quality.json +368 -0
- frugon/data/sample_logs.jsonl.gz +0 -0
- frugon/measure.py +1579 -0
- frugon/model_id.py +203 -0
- frugon/pricing.py +571 -0
- frugon/quality.py +833 -0
- frugon/report.py +7840 -0
- frugon/routing.py +312 -0
- frugon-0.1.0.dist-info/METADATA +226 -0
- frugon-0.1.0.dist-info/RECORD +20 -0
- frugon-0.1.0.dist-info/WHEEL +4 -0
- frugon-0.1.0.dist-info/entry_points.txt +2 -0
- frugon-0.1.0.dist-info/licenses/LICENSE +21 -0
frugon/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""frugon — free, local, open-source LLM cost analyzer."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
# Sent on every outbound registry / leaderboard fetch (pricing + quality
|
|
6
|
+
# refresh). Some hosts — notably the Hugging Face datasets-server backing the
|
|
7
|
+
# LMArena quality table — reject the default ``Python-urllib`` agent with HTTP
|
|
8
|
+
# 500, so an explicit, identifying User-Agent is required for the refreshes to
|
|
9
|
+
# work at all. Kept here as the single source so both fetchers stay in lockstep.
|
|
10
|
+
USER_AGENT = f"frugon/{__version__} (+https://github.com/Rodiun/frugon)"
|
frugon/_progress.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""frugon live-progress helper — transient feedback on stderr, never stdout.
|
|
2
|
+
|
|
3
|
+
Why this module exists
|
|
4
|
+
----------------------
|
|
5
|
+
A first-time user who runs ``frugon analyze`` on a large log should never stare
|
|
6
|
+
at a silent terminal wondering whether the tool has hung. This module supplies
|
|
7
|
+
a small, self-contained set of progress affordances — a spinner, a determinate
|
|
8
|
+
progress bar, and persisted phase checkpoints — that reassure the user while the
|
|
9
|
+
read / pricing pass runs.
|
|
10
|
+
|
|
11
|
+
The one hard rule
|
|
12
|
+
-----------------
|
|
13
|
+
**Every byte of progress chrome goes to a Rich ``Console(stderr=True)``.** The
|
|
14
|
+
analysis RESULT (the panel, tables, footer, report-written line) stays on
|
|
15
|
+
stdout, untouched. This keeps stdout byte-identical to today, which protects:
|
|
16
|
+
|
|
17
|
+
* ``--report`` (the HTML/Markdown artifact is unaffected),
|
|
18
|
+
* piping (``frugon analyze … | cat`` and ``> file`` see only the result),
|
|
19
|
+
* the deterministic ``--demo`` (the gif/screenshot single source of truth), and
|
|
20
|
+
* every existing stdout-asserting test.
|
|
21
|
+
|
|
22
|
+
Gating
|
|
23
|
+
------
|
|
24
|
+
Progress animates ONLY when **all** of the following hold:
|
|
25
|
+
|
|
26
|
+
* stderr is a TTY (``sys.stderr.isatty()``), AND
|
|
27
|
+
* ``NO_COLOR`` is not set in the environment, AND
|
|
28
|
+
* progress was not explicitly disabled (the ``--no-progress`` flag).
|
|
29
|
+
|
|
30
|
+
Otherwise the helper is a complete no-op: no spinner, no bar, no checkpoints —
|
|
31
|
+
non-interactive / piped / CI runs stay clean.
|
|
32
|
+
|
|
33
|
+
Colour discipline
|
|
34
|
+
-----------------
|
|
35
|
+
Progress chrome is neutral / cyan. Green is reserved for the saving headline in
|
|
36
|
+
the result, so it never appears here.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import os
|
|
42
|
+
import sys
|
|
43
|
+
import time
|
|
44
|
+
from collections.abc import Iterator
|
|
45
|
+
from contextlib import contextmanager
|
|
46
|
+
from types import TracebackType
|
|
47
|
+
from typing import TYPE_CHECKING
|
|
48
|
+
|
|
49
|
+
from rich.console import Console
|
|
50
|
+
from rich.progress import (
|
|
51
|
+
BarColumn,
|
|
52
|
+
MofNCompleteColumn,
|
|
53
|
+
Progress,
|
|
54
|
+
SpinnerColumn,
|
|
55
|
+
TaskID,
|
|
56
|
+
TextColumn,
|
|
57
|
+
TimeElapsedColumn,
|
|
58
|
+
TimeRemainingColumn,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if TYPE_CHECKING: # pragma: no cover — typing only
|
|
62
|
+
from rich.status import Status
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ---------------------------------------------------------------------------
|
|
66
|
+
# Gating
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def progress_enabled(*, no_progress: bool) -> bool:
|
|
71
|
+
"""Return True iff live progress chrome should render.
|
|
72
|
+
|
|
73
|
+
All three conditions must hold: stderr is a TTY, ``NO_COLOR`` is unset, and
|
|
74
|
+
the caller did not pass ``--no-progress``. Any one being false makes the
|
|
75
|
+
helper a no-op (silent). Centralised here so every call site shares one
|
|
76
|
+
rule.
|
|
77
|
+
"""
|
|
78
|
+
if no_progress:
|
|
79
|
+
return False
|
|
80
|
+
if os.environ.get("NO_COLOR"):
|
|
81
|
+
return False
|
|
82
|
+
try:
|
|
83
|
+
return bool(sys.stderr.isatty())
|
|
84
|
+
except (ValueError, AttributeError): # pragma: no cover — detached/odd stderr
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
# Reporter
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ProgressReporter:
|
|
94
|
+
"""A small reusable progress surface bound to a stderr console.
|
|
95
|
+
|
|
96
|
+
Construct via :func:`progress_reporter` (a context manager) so the gating
|
|
97
|
+
decision and console wiring happen in one place. When ``enabled`` is False
|
|
98
|
+
every method is a cheap no-op, so call sites stay branch-free.
|
|
99
|
+
|
|
100
|
+
The spinner and bar are *transient* — they clear from the terminal when
|
|
101
|
+
their phase ends. Checkpoints (``checkpoint``) are *persisted*: each prints
|
|
102
|
+
one dim line that stays on screen, leaving a short trail of completed phases
|
|
103
|
+
(e.g. ``✓ Read 56,100 records``). Keep the trail short and tasteful — a few
|
|
104
|
+
lines, never a log dump.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(self, *, enabled: bool) -> None:
|
|
108
|
+
self.enabled = enabled
|
|
109
|
+
# A dedicated stderr console. Even the checkpoint lines go here, never
|
|
110
|
+
# stdout — stdout carries only the analysis result.
|
|
111
|
+
self._console: Console | None = Console(stderr=True) if enabled else None
|
|
112
|
+
|
|
113
|
+
# -- phase checkpoints (persisted) --------------------------------------
|
|
114
|
+
def checkpoint(self, message: str) -> None:
|
|
115
|
+
"""Print a persisted ``✓`` checkpoint line on stderr (dim, neutral).
|
|
116
|
+
|
|
117
|
+
No-op when disabled. *message* should be terse, e.g.
|
|
118
|
+
``"Read 56,100 records"``. The green checkmark is intentionally NOT
|
|
119
|
+
used (green is reserved for the saving headline); the mark is rendered
|
|
120
|
+
in neutral cyan to stay within the progress colour discipline.
|
|
121
|
+
"""
|
|
122
|
+
if self._console is None:
|
|
123
|
+
return
|
|
124
|
+
self._console.print(f"[dim][cyan]✓[/cyan] {message}[/dim]")
|
|
125
|
+
|
|
126
|
+
# -- informational notice (persisted) -----------------------------------
|
|
127
|
+
def notice(self, message: str) -> None:
|
|
128
|
+
"""Print a one-line informational heads-up on stderr (dim, neutral).
|
|
129
|
+
|
|
130
|
+
For a gentle, non-blocking aside — e.g. telling the user a very large log
|
|
131
|
+
may take a moment. It is NOT a warning and NOT a cap; it never changes
|
|
132
|
+
what frugon does. Stderr only, and a no-op when progress is disabled
|
|
133
|
+
(non-TTY / NO_COLOR / --no-progress), so piped and CI runs stay silent.
|
|
134
|
+
"""
|
|
135
|
+
if self._console is None:
|
|
136
|
+
return
|
|
137
|
+
self._console.print(f"[dim]{message}[/dim]")
|
|
138
|
+
|
|
139
|
+
# -- blank separator (persisted) ----------------------------------------
|
|
140
|
+
def blank(self) -> None:
|
|
141
|
+
"""Print one empty line on the stderr progress console.
|
|
142
|
+
|
|
143
|
+
A tasteful, single blank that separates the persisted checkpoint trail
|
|
144
|
+
from whatever the analysis result prints next on stdout — so a fresh
|
|
145
|
+
run does not read as a wall of cramped lines. Stderr only (never
|
|
146
|
+
stdout, which carries the result), and a no-op when disabled (non-TTY /
|
|
147
|
+
NO_COLOR / --no-progress), so piped and CI runs stay clean.
|
|
148
|
+
"""
|
|
149
|
+
if self._console is None:
|
|
150
|
+
return
|
|
151
|
+
self._console.print()
|
|
152
|
+
|
|
153
|
+
# -- spinner (transient, unknown total) ---------------------------------
|
|
154
|
+
@contextmanager
|
|
155
|
+
def spinner(self, message: str) -> Iterator[None]:
|
|
156
|
+
"""Show a transient spinner while an unbounded phase runs.
|
|
157
|
+
|
|
158
|
+
Used for the read/parse phase where the record count is not yet known
|
|
159
|
+
(``Reading logs…``). Clears when the ``with`` block exits. No-op when
|
|
160
|
+
disabled.
|
|
161
|
+
"""
|
|
162
|
+
if self._console is None:
|
|
163
|
+
yield
|
|
164
|
+
return
|
|
165
|
+
status: Status = self._console.status(
|
|
166
|
+
f"[cyan]{message}[/cyan]", spinner="dots", spinner_style="cyan"
|
|
167
|
+
)
|
|
168
|
+
with status:
|
|
169
|
+
yield
|
|
170
|
+
|
|
171
|
+
# -- determinate bar (transient, known total) ---------------------------
|
|
172
|
+
@contextmanager
|
|
173
|
+
def bar(self, message: str, total: int) -> Iterator[ProgressTask]:
|
|
174
|
+
"""Show a transient determinate progress bar for a bounded phase.
|
|
175
|
+
|
|
176
|
+
Yields a :class:`ProgressTask` whose ``advance(n=1)`` the caller invokes
|
|
177
|
+
per unit of work (e.g. once per priced record). The bar shows the
|
|
178
|
+
message, an ``n/total`` count, a bar, elapsed time, and ETA — the key
|
|
179
|
+
reassurance on a big log. Clears when the ``with`` block exits.
|
|
180
|
+
|
|
181
|
+
When disabled (or *total* is non-positive) the yielded task's
|
|
182
|
+
``advance`` is a no-op, so the per-record callback stays cheap and the
|
|
183
|
+
call site never branches.
|
|
184
|
+
"""
|
|
185
|
+
if self._console is None or total <= 0:
|
|
186
|
+
yield _NULL_TASK
|
|
187
|
+
return
|
|
188
|
+
progress = Progress(
|
|
189
|
+
TextColumn("[cyan]{task.description}[/cyan]"),
|
|
190
|
+
MofNCompleteColumn(),
|
|
191
|
+
BarColumn(complete_style="cyan", finished_style="cyan"),
|
|
192
|
+
TimeElapsedColumn(),
|
|
193
|
+
TimeRemainingColumn(),
|
|
194
|
+
console=self._console,
|
|
195
|
+
transient=True,
|
|
196
|
+
)
|
|
197
|
+
with progress:
|
|
198
|
+
task_id = progress.add_task(message, total=total)
|
|
199
|
+
yield _RichProgressTask(progress, task_id)
|
|
200
|
+
|
|
201
|
+
# -- counter (transient, n/total without a bar) -------------------------
|
|
202
|
+
@contextmanager
|
|
203
|
+
def counter(self, prefix: str, total: int) -> Iterator[StepCounter]:
|
|
204
|
+
"""Show a transient ``prefix n/total · <label>`` spinner line.
|
|
205
|
+
|
|
206
|
+
Used for the per-prompt ``--measure`` / ``--judge`` indicator
|
|
207
|
+
(``Sampling prompt 3/5 · gpt-4o-mini``). Yields a :class:`StepCounter`;
|
|
208
|
+
call ``step(label)`` as each prompt begins. No-op when disabled.
|
|
209
|
+
"""
|
|
210
|
+
if self._console is None or total <= 0:
|
|
211
|
+
yield _NULL_COUNTER
|
|
212
|
+
return
|
|
213
|
+
progress = Progress(
|
|
214
|
+
SpinnerColumn(spinner_name="dots", style="cyan"),
|
|
215
|
+
TextColumn("[cyan]{task.description}[/cyan]"),
|
|
216
|
+
console=self._console,
|
|
217
|
+
transient=True,
|
|
218
|
+
)
|
|
219
|
+
with progress:
|
|
220
|
+
task_id = progress.add_task(prefix, total=total)
|
|
221
|
+
yield _RichStepCounter(progress, task_id, prefix, total)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# ---------------------------------------------------------------------------
|
|
225
|
+
# Progress-task abstractions (advance per unit of work)
|
|
226
|
+
# ---------------------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class ProgressTask:
|
|
230
|
+
"""Advance handle for a determinate bar. Base class is the null no-op."""
|
|
231
|
+
|
|
232
|
+
def advance(self, n: int = 1) -> None: # noqa: D401 — simple verb
|
|
233
|
+
"""Advance the bar by *n* units. No-op in the null implementation."""
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class _RichProgressTask(ProgressTask):
|
|
237
|
+
"""A live advance handle backed by a Rich :class:`Progress` task."""
|
|
238
|
+
|
|
239
|
+
def __init__(self, progress: Progress, task_id: TaskID) -> None:
|
|
240
|
+
self._progress = progress
|
|
241
|
+
self._task_id = task_id
|
|
242
|
+
|
|
243
|
+
def advance(self, n: int = 1) -> None:
|
|
244
|
+
self._progress.advance(self._task_id, n)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
_NULL_TASK = ProgressTask()
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class StepCounter:
|
|
251
|
+
"""Step handle for an ``n/total · label`` counter. Base is the null no-op."""
|
|
252
|
+
|
|
253
|
+
def step(self, label: str = "") -> None: # noqa: D401 — simple verb
|
|
254
|
+
"""Mark one step beginning, optionally labelled. No-op in the null impl."""
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class _RichStepCounter(StepCounter):
|
|
258
|
+
"""A live step handle backed by a Rich :class:`Progress` spinner task."""
|
|
259
|
+
|
|
260
|
+
def __init__(self, progress: Progress, task_id: TaskID, prefix: str, total: int) -> None:
|
|
261
|
+
self._progress = progress
|
|
262
|
+
self._task_id = task_id
|
|
263
|
+
self._prefix = prefix
|
|
264
|
+
self._total = total
|
|
265
|
+
self._done = 0
|
|
266
|
+
|
|
267
|
+
def step(self, label: str = "") -> None:
|
|
268
|
+
self._done += 1
|
|
269
|
+
desc = f"{self._prefix} {self._done}/{self._total}"
|
|
270
|
+
if label:
|
|
271
|
+
desc = f"{desc} · {label}"
|
|
272
|
+
self._progress.update(self._task_id, description=desc, completed=self._done - 1)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
_NULL_COUNTER = StepCounter()
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# ---------------------------------------------------------------------------
|
|
279
|
+
# Entry-point context manager
|
|
280
|
+
# ---------------------------------------------------------------------------
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@contextmanager
|
|
284
|
+
def progress_reporter(*, no_progress: bool) -> Iterator[ProgressReporter]:
|
|
285
|
+
"""Yield a :class:`ProgressReporter`, gated by :func:`progress_enabled`.
|
|
286
|
+
|
|
287
|
+
The single entry point for call sites: wrap a command's work in
|
|
288
|
+
``with progress_reporter(no_progress=no_progress) as progress:`` and use
|
|
289
|
+
``progress.spinner(...)`` / ``progress.bar(...)`` / ``progress.checkpoint(...)``.
|
|
290
|
+
When gating says "off" the reporter is a no-op and renders nothing.
|
|
291
|
+
"""
|
|
292
|
+
yield ProgressReporter(enabled=progress_enabled(no_progress=no_progress))
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# ---------------------------------------------------------------------------
|
|
296
|
+
# Elapsed timing helper (used for the "Priced in 4.2s" checkpoint)
|
|
297
|
+
# ---------------------------------------------------------------------------
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class Stopwatch:
|
|
301
|
+
"""A tiny monotonic stopwatch for phase-duration checkpoint lines.
|
|
302
|
+
|
|
303
|
+
Usage::
|
|
304
|
+
|
|
305
|
+
with Stopwatch() as sw:
|
|
306
|
+
... work ...
|
|
307
|
+
reporter.checkpoint(f"Priced in {sw.elapsed:.1f}s")
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
def __init__(self) -> None:
|
|
311
|
+
self._start = 0.0
|
|
312
|
+
self.elapsed = 0.0
|
|
313
|
+
|
|
314
|
+
def __enter__(self) -> Stopwatch:
|
|
315
|
+
self._start = time.perf_counter()
|
|
316
|
+
return self
|
|
317
|
+
|
|
318
|
+
def __exit__(
|
|
319
|
+
self,
|
|
320
|
+
exc_type: type[BaseException] | None,
|
|
321
|
+
exc: BaseException | None,
|
|
322
|
+
tb: TracebackType | None,
|
|
323
|
+
) -> None:
|
|
324
|
+
self.elapsed = time.perf_counter() - self._start
|
frugon/_store.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""frugon._store — shared persistence helpers for pricing and quality modules.
|
|
2
|
+
|
|
3
|
+
Provides atomic JSON writes, first-run seeding, and fetch-URL validation
|
|
4
|
+
used by both pricing.py and quality.py to eliminate code duplication.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import shutil
|
|
11
|
+
import sys
|
|
12
|
+
import time
|
|
13
|
+
import urllib.error
|
|
14
|
+
import urllib.request
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any
|
|
18
|
+
from urllib.parse import urlsplit
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def seed_if_missing(user_path: Path, seed_path: Path) -> None:
|
|
22
|
+
"""Copy *seed_path* to *user_path* if *user_path* does not yet exist.
|
|
23
|
+
|
|
24
|
+
Best-effort: the tool never fails on startup due to a permissions issue in
|
|
25
|
+
the data directory. But the failure is no longer silent — it emits a
|
|
26
|
+
one-line stderr warning so an unwritable data dir surfaces here rather than
|
|
27
|
+
only later as mysteriously empty tables (§4 fail-loud). Callers fall back
|
|
28
|
+
to the bundled seed via load_json_or_empty.
|
|
29
|
+
"""
|
|
30
|
+
if user_path.exists():
|
|
31
|
+
return
|
|
32
|
+
try:
|
|
33
|
+
user_path.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
shutil.copy2(seed_path, user_path)
|
|
35
|
+
except OSError as exc:
|
|
36
|
+
print(
|
|
37
|
+
f"frugon: WARNING could not seed {user_path} ({exc}); "
|
|
38
|
+
"using the bundled data instead.",
|
|
39
|
+
file=sys.stderr,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_json_or_empty(user_path: Path, seed_path: Path) -> dict[str, Any]:
|
|
44
|
+
"""Load JSON from *user_path*, falling back to *seed_path* if absent.
|
|
45
|
+
|
|
46
|
+
Returns an empty dict on any I/O or parse error so callers degrade
|
|
47
|
+
gracefully without raising.
|
|
48
|
+
"""
|
|
49
|
+
if user_path.exists():
|
|
50
|
+
read_path = user_path
|
|
51
|
+
elif seed_path.exists():
|
|
52
|
+
read_path = seed_path
|
|
53
|
+
else:
|
|
54
|
+
return {}
|
|
55
|
+
try:
|
|
56
|
+
with read_path.open(encoding="utf-8") as fh:
|
|
57
|
+
raw: Any = json.load(fh)
|
|
58
|
+
if not isinstance(raw, dict):
|
|
59
|
+
return {}
|
|
60
|
+
return raw
|
|
61
|
+
except (OSError, json.JSONDecodeError):
|
|
62
|
+
return {}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def atomic_write_json(
|
|
66
|
+
path: Path,
|
|
67
|
+
payload: dict[str, Any],
|
|
68
|
+
*,
|
|
69
|
+
sort_keys: bool = False,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""Write *payload* to *path* via a temp-then-replace atomic operation.
|
|
72
|
+
|
|
73
|
+
Creates parent directories as needed. Raises OSError on failure;
|
|
74
|
+
callers that need a domain-specific error type should wrap with ``except
|
|
75
|
+
OSError``. No .tmp file is left on success; any .tmp is removed on
|
|
76
|
+
failure before re-raising.
|
|
77
|
+
"""
|
|
78
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
tmp = path.with_suffix(".tmp")
|
|
80
|
+
try:
|
|
81
|
+
tmp.write_text(json.dumps(payload, indent=2, sort_keys=sort_keys), encoding="utf-8")
|
|
82
|
+
tmp.replace(path)
|
|
83
|
+
except OSError:
|
|
84
|
+
tmp.unlink(missing_ok=True)
|
|
85
|
+
raise
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def validate_fetch_url(url: str, allowed_hosts: frozenset[str]) -> None:
|
|
89
|
+
"""Raise ValueError if *url* is not HTTPS or its host is not in *allowed_hosts*.
|
|
90
|
+
|
|
91
|
+
Prevents accidental or adversarial redirects to non-HTTPS endpoints and
|
|
92
|
+
limits outbound update fetches to the known upstream hosts.
|
|
93
|
+
"""
|
|
94
|
+
if not url.startswith("https://"):
|
|
95
|
+
raise ValueError(f"Update URL must use HTTPS; got: {url!r}")
|
|
96
|
+
host = urlsplit(url).hostname or ""
|
|
97
|
+
if host not in allowed_hosts:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"Update URL host {host!r} is not in the allowed list "
|
|
100
|
+
f"{sorted(allowed_hosts)!r}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def fetch_url_with_retry(
|
|
105
|
+
url: str,
|
|
106
|
+
*,
|
|
107
|
+
user_agent: str,
|
|
108
|
+
max_bytes: int,
|
|
109
|
+
timeout: int = 30,
|
|
110
|
+
max_retries: int = 4,
|
|
111
|
+
backoff_base: float = 1.0,
|
|
112
|
+
on_failure: Callable[[Exception], Exception],
|
|
113
|
+
) -> bytes:
|
|
114
|
+
"""Fetch *url* with bounded retry on transient failures, returning the body.
|
|
115
|
+
|
|
116
|
+
Sends an explicit ``User-Agent`` (some hosts reject the default urllib agent
|
|
117
|
+
with a 5xx). Retries on HTTP 429, HTTP 5xx, and transient
|
|
118
|
+
``(URLError, OSError)`` with exponential backoff (``backoff_base * 2**attempt``
|
|
119
|
+
seconds). When a 429/5xx carries a ``Retry-After`` header (integer seconds),
|
|
120
|
+
that value overrides the computed backoff. A 4xx other than 429 is a
|
|
121
|
+
permanent client error and is NOT retried.
|
|
122
|
+
|
|
123
|
+
Budget: *max_retries* retries after the initial attempt, i.e. at most
|
|
124
|
+
``max_retries + 1`` total requests. Reads at most *max_bytes* of the body.
|
|
125
|
+
|
|
126
|
+
On exhaustion of the retry budget OR a non-retryable error, the supplied
|
|
127
|
+
*on_failure* callable is invoked with the triggering exception and its return
|
|
128
|
+
value is raised — letting each caller produce its own domain exception and
|
|
129
|
+
message (e.g. distinguishing an HTTP failure from a network failure).
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
url: Absolute URL to fetch (caller validates host/scheme beforehand).
|
|
133
|
+
user_agent: Value for the outbound ``User-Agent`` header.
|
|
134
|
+
max_bytes: Maximum number of body bytes to read.
|
|
135
|
+
timeout: Per-request socket timeout in seconds.
|
|
136
|
+
max_retries: Retries allowed after the initial attempt.
|
|
137
|
+
backoff_base: Base backoff in seconds; doubles each attempt.
|
|
138
|
+
on_failure: Maps the triggering exception to the domain exception to raise.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
The response body, capped at *max_bytes*.
|
|
142
|
+
"""
|
|
143
|
+
last_exc: Exception | None = None
|
|
144
|
+
for attempt in range(max_retries + 1): # attempt 0 = first try
|
|
145
|
+
try:
|
|
146
|
+
with urllib.request.urlopen(
|
|
147
|
+
urllib.request.Request(url, headers={"User-Agent": user_agent}),
|
|
148
|
+
timeout=timeout,
|
|
149
|
+
) as resp:
|
|
150
|
+
return resp.read(max_bytes) # type: ignore[no-any-return]
|
|
151
|
+
except urllib.error.HTTPError as exc:
|
|
152
|
+
# 429 (rate limit) and 5xx (transient server errors) are retryable;
|
|
153
|
+
# other 4xx (client errors, e.g. 404) are permanent and are not.
|
|
154
|
+
if exc.code == 429 or exc.code >= 500:
|
|
155
|
+
last_exc = exc
|
|
156
|
+
if attempt < max_retries:
|
|
157
|
+
# Check the headers object's PRESENCE, not truthiness:
|
|
158
|
+
# http.client.HTTPMessage defines __len__, so a present-but-
|
|
159
|
+
# empty headers object is falsy — `if exc.headers` would then
|
|
160
|
+
# wrongly skip an existing Retry-After. `is not None` is correct.
|
|
161
|
+
retry_after_raw: Any = (
|
|
162
|
+
exc.headers.get("Retry-After") if exc.headers is not None else None
|
|
163
|
+
)
|
|
164
|
+
try:
|
|
165
|
+
wait = float(retry_after_raw) if retry_after_raw is not None else None
|
|
166
|
+
except (ValueError, TypeError):
|
|
167
|
+
wait = None
|
|
168
|
+
if wait is None:
|
|
169
|
+
wait = backoff_base * (2**attempt)
|
|
170
|
+
time.sleep(wait)
|
|
171
|
+
continue
|
|
172
|
+
# Exhausted retries on a retryable status.
|
|
173
|
+
raise on_failure(exc) from exc
|
|
174
|
+
# Non-retryable HTTP error (4xx client error).
|
|
175
|
+
raise on_failure(exc) from exc
|
|
176
|
+
except (urllib.error.URLError, OSError) as exc:
|
|
177
|
+
last_exc = exc
|
|
178
|
+
if attempt < max_retries:
|
|
179
|
+
time.sleep(backoff_base * (2**attempt))
|
|
180
|
+
continue
|
|
181
|
+
raise on_failure(exc) from exc
|
|
182
|
+
|
|
183
|
+
# Unreachable, but satisfies type-checkers: the loop always raises or returns.
|
|
184
|
+
assert last_exc is not None
|
|
185
|
+
raise on_failure(last_exc) from last_exc
|