npm - loki-mode - Versions diffs - 7.10.1 → 7.11.0 - Mend

loki-mode 7.10.1 → 7.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/loki +297 -0
package/autonomy/run.sh +22 -0
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +242 -0
package/dashboard/static/cost.html +274 -0
package/dashboard/static/index.html +94 -0
package/docs/INSTALLATION.md +1 -1
package/docs/R3-COST-OBSERVABILITY-DESIGN.md +147 -0
package/loki-ts/dist/loki.js +144 -144
package/mcp/__init__.py +1 -1
package/package.json +1 -1

package/SKILL.md CHANGED Viewed

@@ -3,7 +3,7 @@ name: loki-mode
 description: Autonomous spec-to-product system. Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product via the RARV-C closure loop, with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
 ---
-# Loki Mode v7.10.1
+# Loki Mode v7.11.0
 **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
@@ -381,4 +381,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
 ---
-**v7.10.1 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
+**v7.11.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 7.10.1
1	+ 7.11.0

package/autonomy/loki CHANGED Viewed

@@ -543,6 +543,7 @@ show_help() {
     echo "  optimize         Optimize prompts based on session history"
     echo "  enterprise       Enterprise feature management (tokens, OIDC)"
     echo "  metrics [opts]   Session productivity report (--json, --last N, --save, --share)"
+    echo "  cost [opts]      Transparent cost view: per-run/project spend + budget (--json, --last N)"
     echo "  dogfood          Show self-development statistics"
     echo "  secrets [cmd]    API key status and validation (status|validate)"
     echo "  reset [target]   Reset session state (all|retries|failed)"
@@ -13034,6 +13035,9 @@ main() {
         metrics)
             cmd_metrics "$@"
             ;;
+        cost)
+            cmd_cost "$@"
+            ;;
         syslog)
             cmd_syslog "$@"
             ;;
@@ -17833,6 +17837,299 @@ cmd_syslog() {
     esac
 }
+# Transparent cost view (R3): per-run + per-project spend, model routing, and
+# budget status with the 80% warn line. Reuses efficiency_cost.collect_efficiency
+# for the current-run aggregate (single source of truth) and reads .loki/proofs/
+# for persistent per-run history. Honest: prints "not recorded" when cost was
+# never collected, never a fabricated $0.00.
+cmd_cost() {
+    local show_json=false
+    local last_n=0
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --help|-h)
+                echo -e "${BOLD}loki cost${NC} - Transparent cost and budget view"
+                echo ""
+                echo "Usage: loki cost [options]"
+                echo ""
+                echo "Shows the current run's spend (from .loki/metrics/efficiency/),"
+                echo "per-run history (from .loki/proofs/), model routing by spend, and"
+                echo "budget status. Budgets warn at 80% and hard-stop at 100%."
+                echo ""
+                echo "Options:"
+                echo "  --json               Machine-readable JSON output"
+                echo "  --last N             Show only the last N runs in history (default: all)"
+                echo "  --help, -h           Show this help"
+                echo ""
+                echo "Examples:"
+                echo "  loki cost                      # Cost summary + budget status"
+                echo "  loki cost --json               # Machine-readable output"
+                echo "  loki cost --last 10            # Last 10 runs of history"
+                echo ""
+                echo "Budget cap: set LOKI_BUDGET_LIMIT (USD). Warns at 80%, stops at 100%."
+                exit 0
+                ;;
+            --json) show_json=true; shift ;;
+            --last) last_n="${2:-0}"; shift 2 ;;
+            --last=*) last_n="${1#*=}"; shift ;;
+            *) echo -e "${RED}Unknown option: $1${NC}"; echo "Run 'loki cost --help' for usage."; exit 1 ;;
+        esac
+    done
+    local loki_dir="${LOKI_DIR:-.loki}"
+    if ! command -v python3 &>/dev/null; then
+        echo -e "${RED}python3 is required for the cost view${NC}"
+        exit 1
+    fi
+    LOKI_DIR="$loki_dir" \
+    LOKI_SKILL_DIR="$SKILL_DIR" \
+    COST_JSON="$show_json" \
+    COST_LAST_N="$last_n" \
+    COST_BUDGET_LIMIT="${LOKI_BUDGET_LIMIT:-}" \
+    python3 << 'COST_SCRIPT'
+import json
+import os
+import sys
+loki_dir = os.environ.get("LOKI_DIR", ".loki")
+skill_dir = os.environ.get("LOKI_SKILL_DIR", "")
+show_json = os.environ.get("COST_JSON", "false") == "true"
+try:
+    last_n = int(os.environ.get("COST_LAST_N", "0") or "0")
+except ValueError:
+    last_n = 0
+budget_limit_env = os.environ.get("COST_BUDGET_LIMIT", "").strip()
+# ANSI (suppressed under --json / non-tty)
+use_color = (not show_json) and sys.stdout.isatty()
+BOLD = "\033[1m" if use_color else ""
+DIM = "\033[2m" if use_color else ""
+CYAN = "\033[36m" if use_color else ""
+GREEN = "\033[32m" if use_color else ""
+YELLOW = "\033[33m" if use_color else ""
+RED = "\033[31m" if use_color else ""
+NC = "\033[0m" if use_color else ""
+# Reuse the shared cost lib (single source of truth). Never duplicate the
+# cost-summing logic; collect_efficiency returns usd=None when nothing was
+# recorded, which we surface honestly.
+collect_efficiency = None
+if skill_dir:
+    lib_dir = os.path.join(skill_dir, "autonomy", "lib")
+    if lib_dir not in sys.path:
+        sys.path.insert(0, lib_dir)
+    try:
+        from efficiency_cost import collect_efficiency as _ce
+        collect_efficiency = _ce
+    except Exception:
+        collect_efficiency = None
+def _fmt_usd(v):
+    if v is None:
+        return "not recorded"
+    try:
+        n = float(v)
+    except (TypeError, ValueError):
+        return "not recorded"
+    s = ("%.4f" % n).rstrip("0").rstrip(".")
+    if "." not in s:
+        s += ".00"
+    elif len(s.split(".")[1]) == 1:
+        s += "0"
+    return "$" + s
+# --- current run aggregate (reuse collect_efficiency, single source) -----
+# We do NOT re-implement the cost sum here: efficiency_cost.collect_efficiency
+# is the single source of truth (shared with the proof generator and the R2
+# benchmark adapters). On a broken install where the lib is missing, we degrade
+# honestly rather than ship a divergent 5th copy of the cost math.
+current_cost = None
+current_model = ""
+lib_available = collect_efficiency is not None
+if lib_available:
+    try:
+        cost_dict, current_model = collect_efficiency(loki_dir)
+        current_cost = cost_dict.get("usd")
+    except Exception:
+        current_cost = None
+# --- per-run history from .loki/proofs/ ----------------------------------
+runs = []
+project_total = 0.0
+proofs_dir = os.path.join(loki_dir, "proofs")
+if os.path.isdir(proofs_dir):
+    for name in sorted(os.listdir(proofs_dir)):
+        run_dir = os.path.join(proofs_dir, name)
+        proof_json = os.path.join(run_dir, "proof.json")
+        if not os.path.isfile(proof_json):
+            continue
+        try:
+            d = json.load(open(proof_json))
+        except Exception:
+            continue
+        if not isinstance(d, dict):
+            continue
+        run_cost = (d.get("cost") or {}).get("usd")
+        run_cost_num = None
+        if run_cost is not None:
+            try:
+                run_cost_num = float(run_cost)
+                project_total += run_cost_num
+            except (TypeError, ValueError):
+                run_cost_num = None
+        runs.append({
+            "run_id": d.get("run_id", name),
+            "generated_at": d.get("generated_at"),
+            "model": (d.get("provider") or {}).get("model"),
+            "cost_usd": run_cost_num,
+            "files_changed": (d.get("files_changed") or {}).get("count"),
+            "final_verdict": (d.get("council") or {}).get("final_verdict"),
+        })
+runs.sort(key=lambda x: (x.get("generated_at") or ""), reverse=True)
+if last_n > 0:
+    runs = runs[:last_n]
+# --- budget status (read-time; warn at 80%, exceeded at 100%) ------------
+budget_limit = None
+budget_file = os.path.join(loki_dir, "metrics", "budget.json")
+if os.path.isfile(budget_file):
+    try:
+        bd = json.load(open(budget_file))
+        budget_limit = bd.get("limit") or bd.get("budget_limit")
+    except Exception:
+        budget_limit = None
+if budget_limit is None and budget_limit_env:
+    try:
+        budget_limit = float(budget_limit_env)
+    except ValueError:
+        budget_limit = None
+if budget_limit is not None:
+    try:
+        budget_limit = float(budget_limit)
+    except (TypeError, ValueError):
+        budget_limit = None
+budget_used = current_cost if isinstance(current_cost, (int, float)) else 0.0
+status = "none"
+percent_used = None
+remaining = None
+if budget_limit is not None and budget_limit > 0:
+    percent_used = round(budget_used / budget_limit * 100, 2)
+    remaining = max(0.0, budget_limit - budget_used)
+    if budget_used >= budget_limit:
+        status = "exceeded"
+    elif budget_used >= 0.80 * budget_limit:
+        status = "warn"
+    else:
+        status = "ok"
+# --- model routing by spend (from run history) ---------------------------
+by_model = {}
+for r in runs:
+    c = r.get("cost_usd")
+    if c is None:
+        continue
+    m = r.get("model") or "unknown"
+    by_model[m] = by_model.get(m, 0.0) + c
+if show_json:
+    out = {
+        "current_run": {
+            "cost_usd": current_cost,
+            "model": current_model or None,
+            "cost_recorded": current_cost is not None,
+            "cost_lib_available": lib_available,
+        },
+        "runs": runs,
+        "runs_count": len(runs),
+        "project_total_usd": round(project_total, 6) if runs else 0.0,
+        "by_model": {k: round(v, 6) for k, v in by_model.items()},
+        "budget": {
+            "limit": budget_limit,
+            "used": round(budget_used, 6),
+            "remaining": round(remaining, 6) if remaining is not None else None,
+            "percent_used": percent_used,
+            "status": status,
+            "warn_threshold_percent": 80,
+            "exceeded": status == "exceeded",
+        },
+    }
+    print(json.dumps(out, indent=2))
+    sys.exit(0)
+# --- human-readable ------------------------------------------------------
+ds = chr(36)
+print()
+print(BOLD + "Loki Cost" + NC)
+print(DIM + "=" * 50 + NC)
+print()
+print(CYAN + "Current run" + NC)
+if not lib_available:
+    print(DIM + "  Cost library unavailable (efficiency_cost.py not found)." + NC)
+    print(DIM + "  Current-run spend cannot be computed on this install." + NC)
+elif current_cost is None:
+    print("  Cost not recorded for this run.")
+else:
+    mtxt = (" (" + current_model + ")") if current_model else ""
+    print("  Spend: " + BOLD + _fmt_usd(current_cost) + NC + mtxt)
+print()
+print(CYAN + "Project history" + NC)
+print("  Runs recorded: " + str(len(runs)))
+print("  Total spend:   " + BOLD + (_fmt_usd(project_total) if runs else "$0.00") + NC)
+if by_model:
+    print()
+    print(CYAN + "Model routing (by spend)" + NC)
+    total_m = sum(by_model.values()) or 1.0
+    for m in sorted(by_model, key=lambda k: by_model[k], reverse=True):
+        v = by_model[m]
+        pct = v / total_m * 100
+        bar_len = int(pct / 5)
+        bar = "#" * bar_len + "." * (20 - bar_len)
+        print("  {:<16} {}{:>9} ({:4.1f}%) [{}]".format(m[:16], "", _fmt_usd(v), pct, bar))
+print()
+print(CYAN + "Budget" + NC)
+if budget_limit is None:
+    print("  No cap set. Set LOKI_BUDGET_LIMIT (USD) to cap spend.")
+    print(DIM + "  When set, Loki warns at 80% and hard-stops at 100%." + NC)
+else:
+    col = GREEN
+    if status == "warn":
+        col = YELLOW
+    elif status == "exceeded":
+        col = RED
+    print("  Cap:       " + _fmt_usd(budget_limit))
+    print("  Used:      " + _fmt_usd(budget_used) + " (" + col + str(percent_used) + "%" + NC + ")")
+    print("  Remaining: " + _fmt_usd(remaining))
+    print("  Status:    " + col + BOLD + status.upper() + NC)
+    if status == "warn":
+        print(YELLOW + "  Warning: at or above 80% of cap. Run continues; hard-stop at 100%." + NC)
+    elif status == "exceeded":
+        print(RED + "  Cap reached. The run is paused to prevent a surprise bill." + NC)
+if runs:
+    print()
+    print(CYAN + "Recent runs" + NC)
+    print(DIM + "  {:<28} {:<10} {:>9}  {}".format("Run", "Model", "Cost", "Verdict") + NC)
+    for r in runs[:max(last_n, 10) if last_n else 10]:
+        rid = str(r.get("run_id") or "")[:28]
+        mdl = str(r.get("model") or "")[:10]
+        cst = _fmt_usd(r.get("cost_usd"))
+        vrd = str(r.get("final_verdict") or "")
+        print("  {:<28} {:<10} {:>9}  {}".format(rid, mdl, cst, vrd))
+print()
+print(DIM + "Dashboard cost panel: /cost   |   JSON: loki cost --json" + NC)
+print()
+COST_SCRIPT
+}
 # Fetch and display Prometheus metrics from dashboard
 cmd_metrics() {
     local show_json=false

package/autonomy/run.sh CHANGED Viewed

@@ -8416,6 +8416,28 @@ BUDGETUPD_EOF
 BUDGETUPD_EOF
     fi
+    # Anti-surprise-cost warn (R3): when spend crosses 80% of the cap but is
+    # still under 100%, log a warning and emit an event. Does NOT pause: the
+    # warn is the transparency the user wants BEFORE the hard cap stops them.
+    # Read-time classification only; budget.json schema is unchanged.
+    local warn
+    warn=$(python3 -c "
+import sys
+try:
+    cost = float(sys.argv[1]); limit = float(sys.argv[2])
+    print(1 if (limit > 0 and 0.80 * limit <= cost < limit) else 0)
+except (ValueError, IndexError):
+    print(0)
+" "$current_cost" "$BUDGET_LIMIT" 2>/dev/null || echo "0")
+    if [[ "$warn" == "1" ]]; then
+        log_warn "BUDGET WARNING: \$${current_cost} is at or above 80% of cap \$${BUDGET_LIMIT}. Run continues; hard-stop at 100%."
+        emit_event_json "budget_warning" \
+            "limit=${BUDGET_LIMIT}" \
+            "current=${current_cost}" \
+            "threshold_percent=80" \
+            "iteration=${ITERATION_COUNT:-0}"
+    fi
     return 1
 }

package/dashboard/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ Modules:
     control: Session control API (start/stop/pause/resume)
 """
-__version__ = "7.10.1"
+__version__ = "7.11.0"
 # Expose the control app for easy import
 try:

package/dashboard/server.py CHANGED Viewed

@@ -459,6 +459,7 @@ async def _push_loki_state_loop() -> None:
     """
     last_mtime: float = 0.0
     _last_skill_hash: str = ""  # Track skill-session state changes
+    _last_budget_status: str = ""  # Track budget-status transitions (R3)
     while True:
         try:
             if not manager.active_connections:
@@ -469,6 +470,26 @@ async def _push_loki_state_loop() -> None:
             state_file = loki_dir / "dashboard-state.json"
             _session_file = loki_dir / "session.json"
+            # R3 anti-surprise-cost: proactively push a budget_status message
+            # when spend crosses a threshold (ok -> warn -> exceeded), so a user
+            # who is not watching the terminal sees the 80% warning in any open
+            # dashboard page BEFORE the hard stop at 100%. Reuses the existing
+            # WebSocket broadcast path (manager.broadcast); no second channel.
+            # Sent on transition (independent of the dashboard-state.json mtime
+            # gate) because budget can cross 80% while that file is unchanged.
+            try:
+                _budget = _compute_budget_snapshot(loki_dir)
+                _bstatus = _budget.get("status", "none")
+                if _bstatus in ("warn", "exceeded") and _bstatus != _last_budget_status:
+                    await manager.broadcast({
+                        "type": "budget_status",
+                        "data": _budget,
+                    })
+                # Track every status so a return to ok/none re-arms the warn push.
+                _last_budget_status = _bstatus
+            except (OSError, ValueError, KeyError):
+                pass
             _broadcast_sent = False
             if state_file.exists():
@@ -4551,6 +4572,214 @@ async def get_budget():
     }
+# Budget warn threshold: surface a "warn" status before the hard cap so users
+# are not surprised by a bill. Matches the runtime warn in run.sh
+# check_budget_limit() and budget.ts (warn at 80%, hard-stop at 100%).
+_BUDGET_WARN_FRACTION = 0.80
+def _budget_status(used: float, limit: Optional[float]) -> str:
+    """Classify budget usage. Read-time only; no state mutation.
+    Returns one of: "none" (no limit set), "ok" (<80%), "warn" (>=80% and
+    <100%), "exceeded" (>=100%). The warn band is the anti-surprise wedge:
+    the user sees it BEFORE the hard cap pauses the run.
+    """
+    if limit is None or limit <= 0:
+        return "none"
+    if used >= limit:
+        return "exceeded"
+    if used >= _BUDGET_WARN_FRACTION * limit:
+        return "warn"
+    return "ok"
+def _compute_budget_snapshot(loki_dir: _Path) -> dict:
+    """Read-time budget snapshot shared by /api/cost/timeline and the WS push.
+    Single source of truth so the proactive WebSocket broadcast and the pull
+    endpoint never disagree. "used" is the current run's spend (sum of the live
+    .loki/metrics/efficiency/iteration-*.json records, mirroring
+    check_budget_limit in run.sh). The cap comes from budget.json, falling back
+    to the LOKI_BUDGET_LIMIT env var. No state is mutated.
+    """
+    efficiency_dir = loki_dir / "metrics" / "efficiency"
+    budget_file = loki_dir / "metrics" / "budget.json"
+    current_total = 0.0
+    if efficiency_dir.exists():
+        for eff_file in sorted(efficiency_dir.glob("iteration-*.json")):
+            data = _safe_json_read(eff_file, default=None)
+            if not isinstance(data, dict):
+                continue
+            inp = data.get("input_tokens", 0) or 0
+            out = data.get("output_tokens", 0) or 0
+            model = str(data.get("model", "sonnet")).lower()
+            cost = data.get("cost_usd")
+            if cost is None:
+                cost = _calculate_model_cost(model, inp, out)
+            else:
+                try:
+                    cost = float(cost)
+                except (TypeError, ValueError):
+                    cost = 0.0
+            current_total += cost
+    budget_limit = None
+    if budget_file.exists():
+        bdata = _safe_json_read(budget_file, default=None)
+        if isinstance(bdata, dict):
+            budget_limit = bdata.get("limit") or bdata.get("budget_limit")
+    if budget_limit is None:
+        env_limit = os.environ.get("LOKI_BUDGET_LIMIT", "")
+        if env_limit:
+            try:
+                budget_limit = float(env_limit)
+            except ValueError:
+                budget_limit = None
+    if budget_limit is not None:
+        try:
+            budget_limit = float(budget_limit)
+        except (TypeError, ValueError):
+            budget_limit = None
+    used = round(current_total, 6)
+    if budget_limit is not None and budget_limit > 0:
+        remaining = max(0.0, budget_limit - used)
+        percent_used = round((used / budget_limit) * 100, 2)
+    else:
+        remaining = None
+        percent_used = None
+    status = _budget_status(used, budget_limit)
+    return {
+        "limit": budget_limit,
+        "used": used,
+        "remaining": round(remaining, 6) if remaining is not None else None,
+        "percent_used": percent_used,
+        "status": status,
+        "warn_threshold_percent": int(_BUDGET_WARN_FRACTION * 100),
+        "exceeded": status == "exceeded",
+    }
+@app.get("/api/cost/timeline")
+async def get_cost_timeline():
+    """Cost over time: intra-run per-iteration series + per-run history.
+    Two honest series, distinct sources (see docs/R3-COST-OBSERVABILITY-DESIGN.md):
+      - current_run: from .loki/metrics/efficiency/iteration-*.json. This dir is
+        wiped at the start of every run (run.sh), so it only ever holds the
+        CURRENT run's iterations. Used for the intra-run cumulative line.
+      - runs: from .loki/proofs/<run_id>/proof.json (persistent, one per run).
+        This is the real per-run/per-project "cost over time" history.
+    Budget status is computed at read time (no budget.json schema change) and
+    classifies into ok/warn/exceeded so the UI can warn at 80% before the cap.
+    Cost is never fabricated: when nothing was recorded, cost_recorded is False
+    and totals are honestly null rather than a misleading $0.00.
+    """
+    loki_dir = _get_loki_dir()
+    efficiency_dir = loki_dir / "metrics" / "efficiency"
+    # --- current run: per-iteration series from efficiency/ -----------------
+    iterations: list = []
+    current_total = 0.0
+    cost_recorded = False
+    if efficiency_dir.exists():
+        records = []
+        for eff_file in sorted(efficiency_dir.glob("iteration-*.json")):
+            data = _safe_json_read(eff_file, default=None)
+            if not isinstance(data, dict):
+                continue
+            records.append(data)
+        # Sort by numeric iteration when present, else by filename order.
+        def _iter_key(d):
+            try:
+                return int(d.get("iteration", 0))
+            except (TypeError, ValueError):
+                return 0
+        records.sort(key=_iter_key)
+        cumulative = 0.0
+        for data in records:
+            cost_recorded = True
+            inp = data.get("input_tokens", 0) or 0
+            out = data.get("output_tokens", 0) or 0
+            model = str(data.get("model", "sonnet")).lower()
+            cost = data.get("cost_usd")
+            if cost is None:
+                cost = _calculate_model_cost(model, inp, out)
+            else:
+                try:
+                    cost = float(cost)
+                except (TypeError, ValueError):
+                    cost = 0.0
+            cumulative += cost
+            iterations.append({
+                "iteration": data.get("iteration"),
+                "timestamp": data.get("timestamp"),
+                "model": model,
+                "phase": data.get("phase", "unknown"),
+                "provider": data.get("provider"),
+                "input_tokens": inp,
+                "output_tokens": out,
+                "cost_usd": round(cost, 6),
+                "cumulative_usd": round(cumulative, 6),
+            })
+        current_total = cumulative
+    # --- per-run history: from .loki/proofs/*/proof.json --------------------
+    runs: list = []
+    project_total = 0.0
+    proofs_dir = _proofs_dir()
+    try:
+        entries = sorted(proofs_dir.iterdir())
+    except (OSError, FileNotFoundError):
+        entries = []
+    for entry in entries:
+        if not entry.is_dir():
+            continue
+        data = _safe_json_read(entry / "proof.json", default=None)
+        if not isinstance(data, dict):
+            continue
+        run_cost = (data.get("cost") or {}).get("usd")
+        run_cost_num = None
+        if run_cost is not None:
+            try:
+                run_cost_num = float(run_cost)
+                project_total += run_cost_num
+            except (TypeError, ValueError):
+                run_cost_num = None
+        runs.append({
+            "run_id": data.get("run_id", entry.name),
+            "generated_at": data.get("generated_at"),
+            "model": (data.get("provider") or {}).get("model"),
+            "cost_usd": round(run_cost_num, 6) if run_cost_num is not None else None,
+            "files_changed": (data.get("files_changed") or {}).get("count"),
+            "final_verdict": (data.get("council") or {}).get("final_verdict"),
+        })
+    runs.sort(key=lambda x: (x.get("generated_at") or ""), reverse=True)
+    # --- budget block (read-time status; no mutation) -----------------------
+    # Shared snapshot so the pull endpoint and the proactive WS push agree.
+    # Budget "used" is the current run's spend (mirrors check_budget_limit,
+    # which sums the live efficiency dir against the cap). The per-project
+    # history total is reported separately as project_total_usd.
+    budget = _compute_budget_snapshot(loki_dir)
+    return {
+        "current_run": {
+            "iterations": iterations,
+            "total_usd": round(current_total, 6) if cost_recorded else None,
+            "cost_recorded": cost_recorded,
+        },
+        "runs": runs,
+        "runs_count": len(runs),
+        "project_total_usd": round(project_total, 6) if runs else 0.0,
+        "budget": budget,
+    }
 # =============================================================================
 # Pricing API
 # =============================================================================
@@ -6428,6 +6657,19 @@ async def serve_favicon():
     return Response(status_code=404)
+# Serve the self-contained cost + observability panel (R3). Zero-build
+# standalone page that fetches /api/cost/timeline. Mirrors the proofs.html
+# pattern: works without the SPA build.
+@app.get("/cost", include_in_schema=False)
+async def serve_cost_panel():
+    """Serve the standalone cost + observability HTML panel."""
+    if STATIC_DIR:
+        cost_path = os.path.join(STATIC_DIR, "cost.html")
+        if os.path.isfile(cost_path):
+            return FileResponse(cost_path, media_type="text/html")
+    return Response(status_code=404)
 # Serve index.html or standalone HTML for root
 @app.get("/", include_in_schema=False)
 async def serve_index():