frugon 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
frugon/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """frugon — free, local, open-source LLM cost analyzer."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # Sent on every outbound registry / leaderboard fetch (pricing + quality
6
+ # refresh). Some hosts — notably the Hugging Face datasets-server backing the
7
+ # LMArena quality table — reject the default ``Python-urllib`` agent with HTTP
8
+ # 500, so an explicit, identifying User-Agent is required for the refreshes to
9
+ # work at all. Kept here as the single source so both fetchers stay in lockstep.
10
+ USER_AGENT = f"frugon/{__version__} (+https://github.com/Rodiun/frugon)"
frugon/_progress.py ADDED
@@ -0,0 +1,324 @@
1
+ """frugon live-progress helper — transient feedback on stderr, never stdout.
2
+
3
+ Why this module exists
4
+ ----------------------
5
+ A first-time user who runs ``frugon analyze`` on a large log should never stare
6
+ at a silent terminal wondering whether the tool has hung. This module supplies
7
+ a small, self-contained set of progress affordances — a spinner, a determinate
8
+ progress bar, and persisted phase checkpoints — that reassure the user while the
9
+ read / pricing pass runs.
10
+
11
+ The one hard rule
12
+ -----------------
13
+ **Every byte of progress chrome goes to a Rich ``Console(stderr=True)``.** The
14
+ analysis RESULT (the panel, tables, footer, report-written line) stays on
15
+ stdout, untouched. This keeps stdout byte-identical to today, which protects:
16
+
17
+ * ``--report`` (the HTML/Markdown artifact is unaffected),
18
+ * piping (``frugon analyze … | cat`` and ``> file`` see only the result),
19
+ * the deterministic ``--demo`` (the gif/screenshot single source of truth), and
20
+ * every existing stdout-asserting test.
21
+
22
+ Gating
23
+ ------
24
+ Progress animates ONLY when **all** of the following hold:
25
+
26
+ * stderr is a TTY (``sys.stderr.isatty()``), AND
27
+ * ``NO_COLOR`` is not set in the environment, AND
28
+ * progress was not explicitly disabled (the ``--no-progress`` flag).
29
+
30
+ Otherwise the helper is a complete no-op: no spinner, no bar, no checkpoints —
31
+ non-interactive / piped / CI runs stay clean.
32
+
33
+ Colour discipline
34
+ -----------------
35
+ Progress chrome is neutral / cyan. Green is reserved for the saving headline in
36
+ the result, so it never appears here.
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import os
42
+ import sys
43
+ import time
44
+ from collections.abc import Iterator
45
+ from contextlib import contextmanager
46
+ from types import TracebackType
47
+ from typing import TYPE_CHECKING
48
+
49
+ from rich.console import Console
50
+ from rich.progress import (
51
+ BarColumn,
52
+ MofNCompleteColumn,
53
+ Progress,
54
+ SpinnerColumn,
55
+ TaskID,
56
+ TextColumn,
57
+ TimeElapsedColumn,
58
+ TimeRemainingColumn,
59
+ )
60
+
61
+ if TYPE_CHECKING: # pragma: no cover — typing only
62
+ from rich.status import Status
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Gating
67
+ # ---------------------------------------------------------------------------
68
+
69
+
70
+ def progress_enabled(*, no_progress: bool) -> bool:
71
+ """Return True iff live progress chrome should render.
72
+
73
+ All three conditions must hold: stderr is a TTY, ``NO_COLOR`` is unset, and
74
+ the caller did not pass ``--no-progress``. Any one being false makes the
75
+ helper a no-op (silent). Centralised here so every call site shares one
76
+ rule.
77
+ """
78
+ if no_progress:
79
+ return False
80
+ if os.environ.get("NO_COLOR"):
81
+ return False
82
+ try:
83
+ return bool(sys.stderr.isatty())
84
+ except (ValueError, AttributeError): # pragma: no cover — detached/odd stderr
85
+ return False
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Reporter
90
+ # ---------------------------------------------------------------------------
91
+
92
+
93
+ class ProgressReporter:
94
+ """A small reusable progress surface bound to a stderr console.
95
+
96
+ Construct via :func:`progress_reporter` (a context manager) so the gating
97
+ decision and console wiring happen in one place. When ``enabled`` is False
98
+ every method is a cheap no-op, so call sites stay branch-free.
99
+
100
+ The spinner and bar are *transient* — they clear from the terminal when
101
+ their phase ends. Checkpoints (``checkpoint``) are *persisted*: each prints
102
+ one dim line that stays on screen, leaving a short trail of completed phases
103
+ (e.g. ``✓ Read 56,100 records``). Keep the trail short and tasteful — a few
104
+ lines, never a log dump.
105
+ """
106
+
107
+ def __init__(self, *, enabled: bool) -> None:
108
+ self.enabled = enabled
109
+ # A dedicated stderr console. Even the checkpoint lines go here, never
110
+ # stdout — stdout carries only the analysis result.
111
+ self._console: Console | None = Console(stderr=True) if enabled else None
112
+
113
+ # -- phase checkpoints (persisted) --------------------------------------
114
+ def checkpoint(self, message: str) -> None:
115
+ """Print a persisted ``✓`` checkpoint line on stderr (dim, neutral).
116
+
117
+ No-op when disabled. *message* should be terse, e.g.
118
+ ``"Read 56,100 records"``. The green checkmark is intentionally NOT
119
+ used (green is reserved for the saving headline); the mark is rendered
120
+ in neutral cyan to stay within the progress colour discipline.
121
+ """
122
+ if self._console is None:
123
+ return
124
+ self._console.print(f"[dim][cyan]✓[/cyan] {message}[/dim]")
125
+
126
+ # -- informational notice (persisted) -----------------------------------
127
+ def notice(self, message: str) -> None:
128
+ """Print a one-line informational heads-up on stderr (dim, neutral).
129
+
130
+ For a gentle, non-blocking aside — e.g. telling the user a very large log
131
+ may take a moment. It is NOT a warning and NOT a cap; it never changes
132
+ what frugon does. Stderr only, and a no-op when progress is disabled
133
+ (non-TTY / NO_COLOR / --no-progress), so piped and CI runs stay silent.
134
+ """
135
+ if self._console is None:
136
+ return
137
+ self._console.print(f"[dim]{message}[/dim]")
138
+
139
+ # -- blank separator (persisted) ----------------------------------------
140
+ def blank(self) -> None:
141
+ """Print one empty line on the stderr progress console.
142
+
143
+ A tasteful, single blank that separates the persisted checkpoint trail
144
+ from whatever the analysis result prints next on stdout — so a fresh
145
+ run does not read as a wall of cramped lines. Stderr only (never
146
+ stdout, which carries the result), and a no-op when disabled (non-TTY /
147
+ NO_COLOR / --no-progress), so piped and CI runs stay clean.
148
+ """
149
+ if self._console is None:
150
+ return
151
+ self._console.print()
152
+
153
+ # -- spinner (transient, unknown total) ---------------------------------
154
+ @contextmanager
155
+ def spinner(self, message: str) -> Iterator[None]:
156
+ """Show a transient spinner while an unbounded phase runs.
157
+
158
+ Used for the read/parse phase where the record count is not yet known
159
+ (``Reading logs…``). Clears when the ``with`` block exits. No-op when
160
+ disabled.
161
+ """
162
+ if self._console is None:
163
+ yield
164
+ return
165
+ status: Status = self._console.status(
166
+ f"[cyan]{message}[/cyan]", spinner="dots", spinner_style="cyan"
167
+ )
168
+ with status:
169
+ yield
170
+
171
+ # -- determinate bar (transient, known total) ---------------------------
172
+ @contextmanager
173
+ def bar(self, message: str, total: int) -> Iterator[ProgressTask]:
174
+ """Show a transient determinate progress bar for a bounded phase.
175
+
176
+ Yields a :class:`ProgressTask` whose ``advance(n=1)`` the caller invokes
177
+ per unit of work (e.g. once per priced record). The bar shows the
178
+ message, an ``n/total`` count, a bar, elapsed time, and ETA — the key
179
+ reassurance on a big log. Clears when the ``with`` block exits.
180
+
181
+ When disabled (or *total* is non-positive) the yielded task's
182
+ ``advance`` is a no-op, so the per-record callback stays cheap and the
183
+ call site never branches.
184
+ """
185
+ if self._console is None or total <= 0:
186
+ yield _NULL_TASK
187
+ return
188
+ progress = Progress(
189
+ TextColumn("[cyan]{task.description}[/cyan]"),
190
+ MofNCompleteColumn(),
191
+ BarColumn(complete_style="cyan", finished_style="cyan"),
192
+ TimeElapsedColumn(),
193
+ TimeRemainingColumn(),
194
+ console=self._console,
195
+ transient=True,
196
+ )
197
+ with progress:
198
+ task_id = progress.add_task(message, total=total)
199
+ yield _RichProgressTask(progress, task_id)
200
+
201
+ # -- counter (transient, n/total without a bar) -------------------------
202
+ @contextmanager
203
+ def counter(self, prefix: str, total: int) -> Iterator[StepCounter]:
204
+ """Show a transient ``prefix n/total · <label>`` spinner line.
205
+
206
+ Used for the per-prompt ``--measure`` / ``--judge`` indicator
207
+ (``Sampling prompt 3/5 · gpt-4o-mini``). Yields a :class:`StepCounter`;
208
+ call ``step(label)`` as each prompt begins. No-op when disabled.
209
+ """
210
+ if self._console is None or total <= 0:
211
+ yield _NULL_COUNTER
212
+ return
213
+ progress = Progress(
214
+ SpinnerColumn(spinner_name="dots", style="cyan"),
215
+ TextColumn("[cyan]{task.description}[/cyan]"),
216
+ console=self._console,
217
+ transient=True,
218
+ )
219
+ with progress:
220
+ task_id = progress.add_task(prefix, total=total)
221
+ yield _RichStepCounter(progress, task_id, prefix, total)
222
+
223
+
224
+ # ---------------------------------------------------------------------------
225
+ # Progress-task abstractions (advance per unit of work)
226
+ # ---------------------------------------------------------------------------
227
+
228
+
229
+ class ProgressTask:
230
+ """Advance handle for a determinate bar. Base class is the null no-op."""
231
+
232
+ def advance(self, n: int = 1) -> None: # noqa: D401 — simple verb
233
+ """Advance the bar by *n* units. No-op in the null implementation."""
234
+
235
+
236
+ class _RichProgressTask(ProgressTask):
237
+ """A live advance handle backed by a Rich :class:`Progress` task."""
238
+
239
+ def __init__(self, progress: Progress, task_id: TaskID) -> None:
240
+ self._progress = progress
241
+ self._task_id = task_id
242
+
243
+ def advance(self, n: int = 1) -> None:
244
+ self._progress.advance(self._task_id, n)
245
+
246
+
247
+ _NULL_TASK = ProgressTask()
248
+
249
+
250
+ class StepCounter:
251
+ """Step handle for an ``n/total · label`` counter. Base is the null no-op."""
252
+
253
+ def step(self, label: str = "") -> None: # noqa: D401 — simple verb
254
+ """Mark one step beginning, optionally labelled. No-op in the null impl."""
255
+
256
+
257
+ class _RichStepCounter(StepCounter):
258
+ """A live step handle backed by a Rich :class:`Progress` spinner task."""
259
+
260
+ def __init__(self, progress: Progress, task_id: TaskID, prefix: str, total: int) -> None:
261
+ self._progress = progress
262
+ self._task_id = task_id
263
+ self._prefix = prefix
264
+ self._total = total
265
+ self._done = 0
266
+
267
+ def step(self, label: str = "") -> None:
268
+ self._done += 1
269
+ desc = f"{self._prefix} {self._done}/{self._total}"
270
+ if label:
271
+ desc = f"{desc} · {label}"
272
+ self._progress.update(self._task_id, description=desc, completed=self._done - 1)
273
+
274
+
275
+ _NULL_COUNTER = StepCounter()
276
+
277
+
278
+ # ---------------------------------------------------------------------------
279
+ # Entry-point context manager
280
+ # ---------------------------------------------------------------------------
281
+
282
+
283
+ @contextmanager
284
+ def progress_reporter(*, no_progress: bool) -> Iterator[ProgressReporter]:
285
+ """Yield a :class:`ProgressReporter`, gated by :func:`progress_enabled`.
286
+
287
+ The single entry point for call sites: wrap a command's work in
288
+ ``with progress_reporter(no_progress=no_progress) as progress:`` and use
289
+ ``progress.spinner(...)`` / ``progress.bar(...)`` / ``progress.checkpoint(...)``.
290
+ When gating says "off" the reporter is a no-op and renders nothing.
291
+ """
292
+ yield ProgressReporter(enabled=progress_enabled(no_progress=no_progress))
293
+
294
+
295
+ # ---------------------------------------------------------------------------
296
+ # Elapsed timing helper (used for the "Priced in 4.2s" checkpoint)
297
+ # ---------------------------------------------------------------------------
298
+
299
+
300
+ class Stopwatch:
301
+ """A tiny monotonic stopwatch for phase-duration checkpoint lines.
302
+
303
+ Usage::
304
+
305
+ with Stopwatch() as sw:
306
+ ... work ...
307
+ reporter.checkpoint(f"Priced in {sw.elapsed:.1f}s")
308
+ """
309
+
310
+ def __init__(self) -> None:
311
+ self._start = 0.0
312
+ self.elapsed = 0.0
313
+
314
+ def __enter__(self) -> Stopwatch:
315
+ self._start = time.perf_counter()
316
+ return self
317
+
318
+ def __exit__(
319
+ self,
320
+ exc_type: type[BaseException] | None,
321
+ exc: BaseException | None,
322
+ tb: TracebackType | None,
323
+ ) -> None:
324
+ self.elapsed = time.perf_counter() - self._start
frugon/_store.py ADDED
@@ -0,0 +1,185 @@
1
+ """frugon._store — shared persistence helpers for pricing and quality modules.
2
+
3
+ Provides atomic JSON writes, first-run seeding, and fetch-URL validation
4
+ used by both pricing.py and quality.py to eliminate code duplication.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import shutil
11
+ import sys
12
+ import time
13
+ import urllib.error
14
+ import urllib.request
15
+ from collections.abc import Callable
16
+ from pathlib import Path
17
+ from typing import Any
18
+ from urllib.parse import urlsplit
19
+
20
+
21
+ def seed_if_missing(user_path: Path, seed_path: Path) -> None:
22
+ """Copy *seed_path* to *user_path* if *user_path* does not yet exist.
23
+
24
+ Best-effort: the tool never fails on startup due to a permissions issue in
25
+ the data directory. But the failure is no longer silent — it emits a
26
+ one-line stderr warning so an unwritable data dir surfaces here rather than
27
+ only later as mysteriously empty tables (§4 fail-loud). Callers fall back
28
+ to the bundled seed via load_json_or_empty.
29
+ """
30
+ if user_path.exists():
31
+ return
32
+ try:
33
+ user_path.parent.mkdir(parents=True, exist_ok=True)
34
+ shutil.copy2(seed_path, user_path)
35
+ except OSError as exc:
36
+ print(
37
+ f"frugon: WARNING could not seed {user_path} ({exc}); "
38
+ "using the bundled data instead.",
39
+ file=sys.stderr,
40
+ )
41
+
42
+
43
+ def load_json_or_empty(user_path: Path, seed_path: Path) -> dict[str, Any]:
44
+ """Load JSON from *user_path*, falling back to *seed_path* if absent.
45
+
46
+ Returns an empty dict on any I/O or parse error so callers degrade
47
+ gracefully without raising.
48
+ """
49
+ if user_path.exists():
50
+ read_path = user_path
51
+ elif seed_path.exists():
52
+ read_path = seed_path
53
+ else:
54
+ return {}
55
+ try:
56
+ with read_path.open(encoding="utf-8") as fh:
57
+ raw: Any = json.load(fh)
58
+ if not isinstance(raw, dict):
59
+ return {}
60
+ return raw
61
+ except (OSError, json.JSONDecodeError):
62
+ return {}
63
+
64
+
65
+ def atomic_write_json(
66
+ path: Path,
67
+ payload: dict[str, Any],
68
+ *,
69
+ sort_keys: bool = False,
70
+ ) -> None:
71
+ """Write *payload* to *path* via a temp-then-replace atomic operation.
72
+
73
+ Creates parent directories as needed. Raises OSError on failure;
74
+ callers that need a domain-specific error type should wrap with ``except
75
+ OSError``. No .tmp file is left on success; any .tmp is removed on
76
+ failure before re-raising.
77
+ """
78
+ path.parent.mkdir(parents=True, exist_ok=True)
79
+ tmp = path.with_suffix(".tmp")
80
+ try:
81
+ tmp.write_text(json.dumps(payload, indent=2, sort_keys=sort_keys), encoding="utf-8")
82
+ tmp.replace(path)
83
+ except OSError:
84
+ tmp.unlink(missing_ok=True)
85
+ raise
86
+
87
+
88
+ def validate_fetch_url(url: str, allowed_hosts: frozenset[str]) -> None:
89
+ """Raise ValueError if *url* is not HTTPS or its host is not in *allowed_hosts*.
90
+
91
+ Prevents accidental or adversarial redirects to non-HTTPS endpoints and
92
+ limits outbound update fetches to the known upstream hosts.
93
+ """
94
+ if not url.startswith("https://"):
95
+ raise ValueError(f"Update URL must use HTTPS; got: {url!r}")
96
+ host = urlsplit(url).hostname or ""
97
+ if host not in allowed_hosts:
98
+ raise ValueError(
99
+ f"Update URL host {host!r} is not in the allowed list "
100
+ f"{sorted(allowed_hosts)!r}"
101
+ )
102
+
103
+
104
+ def fetch_url_with_retry(
105
+ url: str,
106
+ *,
107
+ user_agent: str,
108
+ max_bytes: int,
109
+ timeout: int = 30,
110
+ max_retries: int = 4,
111
+ backoff_base: float = 1.0,
112
+ on_failure: Callable[[Exception], Exception],
113
+ ) -> bytes:
114
+ """Fetch *url* with bounded retry on transient failures, returning the body.
115
+
116
+ Sends an explicit ``User-Agent`` (some hosts reject the default urllib agent
117
+ with a 5xx). Retries on HTTP 429, HTTP 5xx, and transient
118
+ ``(URLError, OSError)`` with exponential backoff (``backoff_base * 2**attempt``
119
+ seconds). When a 429/5xx carries a ``Retry-After`` header (integer seconds),
120
+ that value overrides the computed backoff. A 4xx other than 429 is a
121
+ permanent client error and is NOT retried.
122
+
123
+ Budget: *max_retries* retries after the initial attempt, i.e. at most
124
+ ``max_retries + 1`` total requests. Reads at most *max_bytes* of the body.
125
+
126
+ On exhaustion of the retry budget OR a non-retryable error, the supplied
127
+ *on_failure* callable is invoked with the triggering exception and its return
128
+ value is raised — letting each caller produce its own domain exception and
129
+ message (e.g. distinguishing an HTTP failure from a network failure).
130
+
131
+ Args:
132
+ url: Absolute URL to fetch (caller validates host/scheme beforehand).
133
+ user_agent: Value for the outbound ``User-Agent`` header.
134
+ max_bytes: Maximum number of body bytes to read.
135
+ timeout: Per-request socket timeout in seconds.
136
+ max_retries: Retries allowed after the initial attempt.
137
+ backoff_base: Base backoff in seconds; doubles each attempt.
138
+ on_failure: Maps the triggering exception to the domain exception to raise.
139
+
140
+ Returns:
141
+ The response body, capped at *max_bytes*.
142
+ """
143
+ last_exc: Exception | None = None
144
+ for attempt in range(max_retries + 1): # attempt 0 = first try
145
+ try:
146
+ with urllib.request.urlopen(
147
+ urllib.request.Request(url, headers={"User-Agent": user_agent}),
148
+ timeout=timeout,
149
+ ) as resp:
150
+ return resp.read(max_bytes) # type: ignore[no-any-return]
151
+ except urllib.error.HTTPError as exc:
152
+ # 429 (rate limit) and 5xx (transient server errors) are retryable;
153
+ # other 4xx (client errors, e.g. 404) are permanent and are not.
154
+ if exc.code == 429 or exc.code >= 500:
155
+ last_exc = exc
156
+ if attempt < max_retries:
157
+ # Check the headers object's PRESENCE, not truthiness:
158
+ # http.client.HTTPMessage defines __len__, so a present-but-
159
+ # empty headers object is falsy — `if exc.headers` would then
160
+ # wrongly skip an existing Retry-After. `is not None` is correct.
161
+ retry_after_raw: Any = (
162
+ exc.headers.get("Retry-After") if exc.headers is not None else None
163
+ )
164
+ try:
165
+ wait = float(retry_after_raw) if retry_after_raw is not None else None
166
+ except (ValueError, TypeError):
167
+ wait = None
168
+ if wait is None:
169
+ wait = backoff_base * (2**attempt)
170
+ time.sleep(wait)
171
+ continue
172
+ # Exhausted retries on a retryable status.
173
+ raise on_failure(exc) from exc
174
+ # Non-retryable HTTP error (4xx client error).
175
+ raise on_failure(exc) from exc
176
+ except (urllib.error.URLError, OSError) as exc:
177
+ last_exc = exc
178
+ if attempt < max_retries:
179
+ time.sleep(backoff_base * (2**attempt))
180
+ continue
181
+ raise on_failure(exc) from exc
182
+
183
+ # Unreachable, but satisfies type-checkers: the loop always raises or returns.
184
+ assert last_exc is not None
185
+ raise on_failure(last_exc) from last_exc