loki-mode 7.10.1 → 7.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Autonomous spec-to-product system. Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product via the RARV-C closure loop, with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v7.10.1
6
+ # Loki Mode v7.11.0
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -381,4 +381,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
381
381
 
382
382
  ---
383
383
 
384
- **v7.10.1 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
384
+ **v7.11.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 7.10.1
1
+ 7.11.0
package/autonomy/loki CHANGED
@@ -543,6 +543,7 @@ show_help() {
543
543
  echo " optimize Optimize prompts based on session history"
544
544
  echo " enterprise Enterprise feature management (tokens, OIDC)"
545
545
  echo " metrics [opts] Session productivity report (--json, --last N, --save, --share)"
546
+ echo " cost [opts] Transparent cost view: per-run/project spend + budget (--json, --last N)"
546
547
  echo " dogfood Show self-development statistics"
547
548
  echo " secrets [cmd] API key status and validation (status|validate)"
548
549
  echo " reset [target] Reset session state (all|retries|failed)"
@@ -13034,6 +13035,9 @@ main() {
13034
13035
  metrics)
13035
13036
  cmd_metrics "$@"
13036
13037
  ;;
13038
+ cost)
13039
+ cmd_cost "$@"
13040
+ ;;
13037
13041
  syslog)
13038
13042
  cmd_syslog "$@"
13039
13043
  ;;
@@ -17833,6 +17837,299 @@ cmd_syslog() {
17833
17837
  esac
17834
17838
  }
17835
17839
 
17840
+ # Transparent cost view (R3): per-run + per-project spend, model routing, and
17841
+ # budget status with the 80% warn line. Reuses efficiency_cost.collect_efficiency
17842
+ # for the current-run aggregate (single source of truth) and reads .loki/proofs/
17843
+ # for persistent per-run history. Honest: prints "not recorded" when cost was
17844
+ # never collected, never a fabricated $0.00.
17845
+ cmd_cost() {
17846
+ local show_json=false
17847
+ local last_n=0
17848
+
17849
+ while [[ $# -gt 0 ]]; do
17850
+ case "$1" in
17851
+ --help|-h)
17852
+ echo -e "${BOLD}loki cost${NC} - Transparent cost and budget view"
17853
+ echo ""
17854
+ echo "Usage: loki cost [options]"
17855
+ echo ""
17856
+ echo "Shows the current run's spend (from .loki/metrics/efficiency/),"
17857
+ echo "per-run history (from .loki/proofs/), model routing by spend, and"
17858
+ echo "budget status. Budgets warn at 80% and hard-stop at 100%."
17859
+ echo ""
17860
+ echo "Options:"
17861
+ echo " --json Machine-readable JSON output"
17862
+ echo " --last N Show only the last N runs in history (default: all)"
17863
+ echo " --help, -h Show this help"
17864
+ echo ""
17865
+ echo "Examples:"
17866
+ echo " loki cost # Cost summary + budget status"
17867
+ echo " loki cost --json # Machine-readable output"
17868
+ echo " loki cost --last 10 # Last 10 runs of history"
17869
+ echo ""
17870
+ echo "Budget cap: set LOKI_BUDGET_LIMIT (USD). Warns at 80%, stops at 100%."
17871
+ exit 0
17872
+ ;;
17873
+ --json) show_json=true; shift ;;
17874
+ --last) last_n="${2:-0}"; shift 2 ;;
17875
+ --last=*) last_n="${1#*=}"; shift ;;
17876
+ *) echo -e "${RED}Unknown option: $1${NC}"; echo "Run 'loki cost --help' for usage."; exit 1 ;;
17877
+ esac
17878
+ done
17879
+
17880
+ local loki_dir="${LOKI_DIR:-.loki}"
17881
+
17882
+ if ! command -v python3 &>/dev/null; then
17883
+ echo -e "${RED}python3 is required for the cost view${NC}"
17884
+ exit 1
17885
+ fi
17886
+
17887
+ LOKI_DIR="$loki_dir" \
17888
+ LOKI_SKILL_DIR="$SKILL_DIR" \
17889
+ COST_JSON="$show_json" \
17890
+ COST_LAST_N="$last_n" \
17891
+ COST_BUDGET_LIMIT="${LOKI_BUDGET_LIMIT:-}" \
17892
+ python3 << 'COST_SCRIPT'
17893
+ import json
17894
+ import os
17895
+ import sys
17896
+
17897
+ loki_dir = os.environ.get("LOKI_DIR", ".loki")
17898
+ skill_dir = os.environ.get("LOKI_SKILL_DIR", "")
17899
+ show_json = os.environ.get("COST_JSON", "false") == "true"
17900
+ try:
17901
+ last_n = int(os.environ.get("COST_LAST_N", "0") or "0")
17902
+ except ValueError:
17903
+ last_n = 0
17904
+ budget_limit_env = os.environ.get("COST_BUDGET_LIMIT", "").strip()
17905
+
17906
+ # ANSI (suppressed under --json / non-tty)
17907
+ use_color = (not show_json) and sys.stdout.isatty()
17908
+ BOLD = "\033[1m" if use_color else ""
17909
+ DIM = "\033[2m" if use_color else ""
17910
+ CYAN = "\033[36m" if use_color else ""
17911
+ GREEN = "\033[32m" if use_color else ""
17912
+ YELLOW = "\033[33m" if use_color else ""
17913
+ RED = "\033[31m" if use_color else ""
17914
+ NC = "\033[0m" if use_color else ""
17915
+
17916
+ # Reuse the shared cost lib (single source of truth). Never duplicate the
17917
+ # cost-summing logic; collect_efficiency returns usd=None when nothing was
17918
+ # recorded, which we surface honestly.
17919
+ collect_efficiency = None
17920
+ if skill_dir:
17921
+ lib_dir = os.path.join(skill_dir, "autonomy", "lib")
17922
+ if lib_dir not in sys.path:
17923
+ sys.path.insert(0, lib_dir)
17924
+ try:
17925
+ from efficiency_cost import collect_efficiency as _ce
17926
+ collect_efficiency = _ce
17927
+ except Exception:
17928
+ collect_efficiency = None
17929
+
17930
+ def _fmt_usd(v):
17931
+ if v is None:
17932
+ return "not recorded"
17933
+ try:
17934
+ n = float(v)
17935
+ except (TypeError, ValueError):
17936
+ return "not recorded"
17937
+ s = ("%.4f" % n).rstrip("0").rstrip(".")
17938
+ if "." not in s:
17939
+ s += ".00"
17940
+ elif len(s.split(".")[1]) == 1:
17941
+ s += "0"
17942
+ return "$" + s
17943
+
17944
+ # --- current run aggregate (reuse collect_efficiency, single source) -----
17945
+ # We do NOT re-implement the cost sum here: efficiency_cost.collect_efficiency
17946
+ # is the single source of truth (shared with the proof generator and the R2
17947
+ # benchmark adapters). On a broken install where the lib is missing, we degrade
17948
+ # honestly rather than ship a divergent 5th copy of the cost math.
17949
+ current_cost = None
17950
+ current_model = ""
17951
+ lib_available = collect_efficiency is not None
17952
+ if lib_available:
17953
+ try:
17954
+ cost_dict, current_model = collect_efficiency(loki_dir)
17955
+ current_cost = cost_dict.get("usd")
17956
+ except Exception:
17957
+ current_cost = None
17958
+
17959
+ # --- per-run history from .loki/proofs/ ----------------------------------
17960
+ runs = []
17961
+ project_total = 0.0
17962
+ proofs_dir = os.path.join(loki_dir, "proofs")
17963
+ if os.path.isdir(proofs_dir):
17964
+ for name in sorted(os.listdir(proofs_dir)):
17965
+ run_dir = os.path.join(proofs_dir, name)
17966
+ proof_json = os.path.join(run_dir, "proof.json")
17967
+ if not os.path.isfile(proof_json):
17968
+ continue
17969
+ try:
17970
+ d = json.load(open(proof_json))
17971
+ except Exception:
17972
+ continue
17973
+ if not isinstance(d, dict):
17974
+ continue
17975
+ run_cost = (d.get("cost") or {}).get("usd")
17976
+ run_cost_num = None
17977
+ if run_cost is not None:
17978
+ try:
17979
+ run_cost_num = float(run_cost)
17980
+ project_total += run_cost_num
17981
+ except (TypeError, ValueError):
17982
+ run_cost_num = None
17983
+ runs.append({
17984
+ "run_id": d.get("run_id", name),
17985
+ "generated_at": d.get("generated_at"),
17986
+ "model": (d.get("provider") or {}).get("model"),
17987
+ "cost_usd": run_cost_num,
17988
+ "files_changed": (d.get("files_changed") or {}).get("count"),
17989
+ "final_verdict": (d.get("council") or {}).get("final_verdict"),
17990
+ })
17991
+ runs.sort(key=lambda x: (x.get("generated_at") or ""), reverse=True)
17992
+ if last_n > 0:
17993
+ runs = runs[:last_n]
17994
+
17995
+ # --- budget status (read-time; warn at 80%, exceeded at 100%) ------------
17996
+ budget_limit = None
17997
+ budget_file = os.path.join(loki_dir, "metrics", "budget.json")
17998
+ if os.path.isfile(budget_file):
17999
+ try:
18000
+ bd = json.load(open(budget_file))
18001
+ budget_limit = bd.get("limit") or bd.get("budget_limit")
18002
+ except Exception:
18003
+ budget_limit = None
18004
+ if budget_limit is None and budget_limit_env:
18005
+ try:
18006
+ budget_limit = float(budget_limit_env)
18007
+ except ValueError:
18008
+ budget_limit = None
18009
+ if budget_limit is not None:
18010
+ try:
18011
+ budget_limit = float(budget_limit)
18012
+ except (TypeError, ValueError):
18013
+ budget_limit = None
18014
+
18015
+ budget_used = current_cost if isinstance(current_cost, (int, float)) else 0.0
18016
+ status = "none"
18017
+ percent_used = None
18018
+ remaining = None
18019
+ if budget_limit is not None and budget_limit > 0:
18020
+ percent_used = round(budget_used / budget_limit * 100, 2)
18021
+ remaining = max(0.0, budget_limit - budget_used)
18022
+ if budget_used >= budget_limit:
18023
+ status = "exceeded"
18024
+ elif budget_used >= 0.80 * budget_limit:
18025
+ status = "warn"
18026
+ else:
18027
+ status = "ok"
18028
+
18029
+ # --- model routing by spend (from run history) ---------------------------
18030
+ by_model = {}
18031
+ for r in runs:
18032
+ c = r.get("cost_usd")
18033
+ if c is None:
18034
+ continue
18035
+ m = r.get("model") or "unknown"
18036
+ by_model[m] = by_model.get(m, 0.0) + c
18037
+
18038
+ if show_json:
18039
+ out = {
18040
+ "current_run": {
18041
+ "cost_usd": current_cost,
18042
+ "model": current_model or None,
18043
+ "cost_recorded": current_cost is not None,
18044
+ "cost_lib_available": lib_available,
18045
+ },
18046
+ "runs": runs,
18047
+ "runs_count": len(runs),
18048
+ "project_total_usd": round(project_total, 6) if runs else 0.0,
18049
+ "by_model": {k: round(v, 6) for k, v in by_model.items()},
18050
+ "budget": {
18051
+ "limit": budget_limit,
18052
+ "used": round(budget_used, 6),
18053
+ "remaining": round(remaining, 6) if remaining is not None else None,
18054
+ "percent_used": percent_used,
18055
+ "status": status,
18056
+ "warn_threshold_percent": 80,
18057
+ "exceeded": status == "exceeded",
18058
+ },
18059
+ }
18060
+ print(json.dumps(out, indent=2))
18061
+ sys.exit(0)
18062
+
18063
+ # --- human-readable ------------------------------------------------------
18064
+ ds = chr(36)
18065
+ print()
18066
+ print(BOLD + "Loki Cost" + NC)
18067
+ print(DIM + "=" * 50 + NC)
18068
+
18069
+ print()
18070
+ print(CYAN + "Current run" + NC)
18071
+ if not lib_available:
18072
+ print(DIM + " Cost library unavailable (efficiency_cost.py not found)." + NC)
18073
+ print(DIM + " Current-run spend cannot be computed on this install." + NC)
18074
+ elif current_cost is None:
18075
+ print(" Cost not recorded for this run.")
18076
+ else:
18077
+ mtxt = (" (" + current_model + ")") if current_model else ""
18078
+ print(" Spend: " + BOLD + _fmt_usd(current_cost) + NC + mtxt)
18079
+
18080
+ print()
18081
+ print(CYAN + "Project history" + NC)
18082
+ print(" Runs recorded: " + str(len(runs)))
18083
+ print(" Total spend: " + BOLD + (_fmt_usd(project_total) if runs else "$0.00") + NC)
18084
+
18085
+ if by_model:
18086
+ print()
18087
+ print(CYAN + "Model routing (by spend)" + NC)
18088
+ total_m = sum(by_model.values()) or 1.0
18089
+ for m in sorted(by_model, key=lambda k: by_model[k], reverse=True):
18090
+ v = by_model[m]
18091
+ pct = v / total_m * 100
18092
+ bar_len = int(pct / 5)
18093
+ bar = "#" * bar_len + "." * (20 - bar_len)
18094
+ print(" {:<16} {}{:>9} ({:4.1f}%) [{}]".format(m[:16], "", _fmt_usd(v), pct, bar))
18095
+
18096
+ print()
18097
+ print(CYAN + "Budget" + NC)
18098
+ if budget_limit is None:
18099
+ print(" No cap set. Set LOKI_BUDGET_LIMIT (USD) to cap spend.")
18100
+ print(DIM + " When set, Loki warns at 80% and hard-stops at 100%." + NC)
18101
+ else:
18102
+ col = GREEN
18103
+ if status == "warn":
18104
+ col = YELLOW
18105
+ elif status == "exceeded":
18106
+ col = RED
18107
+ print(" Cap: " + _fmt_usd(budget_limit))
18108
+ print(" Used: " + _fmt_usd(budget_used) + " (" + col + str(percent_used) + "%" + NC + ")")
18109
+ print(" Remaining: " + _fmt_usd(remaining))
18110
+ print(" Status: " + col + BOLD + status.upper() + NC)
18111
+ if status == "warn":
18112
+ print(YELLOW + " Warning: at or above 80% of cap. Run continues; hard-stop at 100%." + NC)
18113
+ elif status == "exceeded":
18114
+ print(RED + " Cap reached. The run is paused to prevent a surprise bill." + NC)
18115
+
18116
+ if runs:
18117
+ print()
18118
+ print(CYAN + "Recent runs" + NC)
18119
+ print(DIM + " {:<28} {:<10} {:>9} {}".format("Run", "Model", "Cost", "Verdict") + NC)
18120
+ for r in runs[:max(last_n, 10) if last_n else 10]:
18121
+ rid = str(r.get("run_id") or "")[:28]
18122
+ mdl = str(r.get("model") or "")[:10]
18123
+ cst = _fmt_usd(r.get("cost_usd"))
18124
+ vrd = str(r.get("final_verdict") or "")
18125
+ print(" {:<28} {:<10} {:>9} {}".format(rid, mdl, cst, vrd))
18126
+
18127
+ print()
18128
+ print(DIM + "Dashboard cost panel: /cost | JSON: loki cost --json" + NC)
18129
+ print()
18130
+ COST_SCRIPT
18131
+ }
18132
+
17836
18133
  # Fetch and display Prometheus metrics from dashboard
17837
18134
  cmd_metrics() {
17838
18135
  local show_json=false
package/autonomy/run.sh CHANGED
@@ -8416,6 +8416,28 @@ BUDGETUPD_EOF
8416
8416
  BUDGETUPD_EOF
8417
8417
  fi
8418
8418
 
8419
+ # Anti-surprise-cost warn (R3): when spend crosses 80% of the cap but is
8420
+ # still under 100%, log a warning and emit an event. Does NOT pause: the
8421
+ # warn is the transparency the user wants BEFORE the hard cap stops them.
8422
+ # Read-time classification only; budget.json schema is unchanged.
8423
+ local warn
8424
+ warn=$(python3 -c "
8425
+ import sys
8426
+ try:
8427
+ cost = float(sys.argv[1]); limit = float(sys.argv[2])
8428
+ print(1 if (limit > 0 and 0.80 * limit <= cost < limit) else 0)
8429
+ except (ValueError, IndexError):
8430
+ print(0)
8431
+ " "$current_cost" "$BUDGET_LIMIT" 2>/dev/null || echo "0")
8432
+ if [[ "$warn" == "1" ]]; then
8433
+ log_warn "BUDGET WARNING: \$${current_cost} is at or above 80% of cap \$${BUDGET_LIMIT}. Run continues; hard-stop at 100%."
8434
+ emit_event_json "budget_warning" \
8435
+ "limit=${BUDGET_LIMIT}" \
8436
+ "current=${current_cost}" \
8437
+ "threshold_percent=80" \
8438
+ "iteration=${ITERATION_COUNT:-0}"
8439
+ fi
8440
+
8419
8441
  return 1
8420
8442
  }
8421
8443
 
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "7.10.1"
10
+ __version__ = "7.11.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try:
@@ -459,6 +459,7 @@ async def _push_loki_state_loop() -> None:
459
459
  """
460
460
  last_mtime: float = 0.0
461
461
  _last_skill_hash: str = "" # Track skill-session state changes
462
+ _last_budget_status: str = "" # Track budget-status transitions (R3)
462
463
  while True:
463
464
  try:
464
465
  if not manager.active_connections:
@@ -469,6 +470,26 @@ async def _push_loki_state_loop() -> None:
469
470
  state_file = loki_dir / "dashboard-state.json"
470
471
  _session_file = loki_dir / "session.json"
471
472
 
473
+ # R3 anti-surprise-cost: proactively push a budget_status message
474
+ # when spend crosses a threshold (ok -> warn -> exceeded), so a user
475
+ # who is not watching the terminal sees the 80% warning in any open
476
+ # dashboard page BEFORE the hard stop at 100%. Reuses the existing
477
+ # WebSocket broadcast path (manager.broadcast); no second channel.
478
+ # Sent on transition (independent of the dashboard-state.json mtime
479
+ # gate) because budget can cross 80% while that file is unchanged.
480
+ try:
481
+ _budget = _compute_budget_snapshot(loki_dir)
482
+ _bstatus = _budget.get("status", "none")
483
+ if _bstatus in ("warn", "exceeded") and _bstatus != _last_budget_status:
484
+ await manager.broadcast({
485
+ "type": "budget_status",
486
+ "data": _budget,
487
+ })
488
+ # Track every status so a return to ok/none re-arms the warn push.
489
+ _last_budget_status = _bstatus
490
+ except (OSError, ValueError, KeyError):
491
+ pass
492
+
472
493
  _broadcast_sent = False
473
494
 
474
495
  if state_file.exists():
@@ -4551,6 +4572,214 @@ async def get_budget():
4551
4572
  }
4552
4573
 
4553
4574
 
4575
+ # Budget warn threshold: surface a "warn" status before the hard cap so users
4576
+ # are not surprised by a bill. Matches the runtime warn in run.sh
4577
+ # check_budget_limit() and budget.ts (warn at 80%, hard-stop at 100%).
4578
+ _BUDGET_WARN_FRACTION = 0.80
4579
+
4580
+
4581
+ def _budget_status(used: float, limit: Optional[float]) -> str:
4582
+ """Classify budget usage. Read-time only; no state mutation.
4583
+
4584
+ Returns one of: "none" (no limit set), "ok" (<80%), "warn" (>=80% and
4585
+ <100%), "exceeded" (>=100%). The warn band is the anti-surprise wedge:
4586
+ the user sees it BEFORE the hard cap pauses the run.
4587
+ """
4588
+ if limit is None or limit <= 0:
4589
+ return "none"
4590
+ if used >= limit:
4591
+ return "exceeded"
4592
+ if used >= _BUDGET_WARN_FRACTION * limit:
4593
+ return "warn"
4594
+ return "ok"
4595
+
4596
+
4597
+ def _compute_budget_snapshot(loki_dir: _Path) -> dict:
4598
+ """Read-time budget snapshot shared by /api/cost/timeline and the WS push.
4599
+
4600
+ Single source of truth so the proactive WebSocket broadcast and the pull
4601
+ endpoint never disagree. "used" is the current run's spend (sum of the live
4602
+ .loki/metrics/efficiency/iteration-*.json records, mirroring
4603
+ check_budget_limit in run.sh). The cap comes from budget.json, falling back
4604
+ to the LOKI_BUDGET_LIMIT env var. No state is mutated.
4605
+ """
4606
+ efficiency_dir = loki_dir / "metrics" / "efficiency"
4607
+ budget_file = loki_dir / "metrics" / "budget.json"
4608
+
4609
+ current_total = 0.0
4610
+ if efficiency_dir.exists():
4611
+ for eff_file in sorted(efficiency_dir.glob("iteration-*.json")):
4612
+ data = _safe_json_read(eff_file, default=None)
4613
+ if not isinstance(data, dict):
4614
+ continue
4615
+ inp = data.get("input_tokens", 0) or 0
4616
+ out = data.get("output_tokens", 0) or 0
4617
+ model = str(data.get("model", "sonnet")).lower()
4618
+ cost = data.get("cost_usd")
4619
+ if cost is None:
4620
+ cost = _calculate_model_cost(model, inp, out)
4621
+ else:
4622
+ try:
4623
+ cost = float(cost)
4624
+ except (TypeError, ValueError):
4625
+ cost = 0.0
4626
+ current_total += cost
4627
+
4628
+ budget_limit = None
4629
+ if budget_file.exists():
4630
+ bdata = _safe_json_read(budget_file, default=None)
4631
+ if isinstance(bdata, dict):
4632
+ budget_limit = bdata.get("limit") or bdata.get("budget_limit")
4633
+ if budget_limit is None:
4634
+ env_limit = os.environ.get("LOKI_BUDGET_LIMIT", "")
4635
+ if env_limit:
4636
+ try:
4637
+ budget_limit = float(env_limit)
4638
+ except ValueError:
4639
+ budget_limit = None
4640
+ if budget_limit is not None:
4641
+ try:
4642
+ budget_limit = float(budget_limit)
4643
+ except (TypeError, ValueError):
4644
+ budget_limit = None
4645
+
4646
+ used = round(current_total, 6)
4647
+ if budget_limit is not None and budget_limit > 0:
4648
+ remaining = max(0.0, budget_limit - used)
4649
+ percent_used = round((used / budget_limit) * 100, 2)
4650
+ else:
4651
+ remaining = None
4652
+ percent_used = None
4653
+ status = _budget_status(used, budget_limit)
4654
+
4655
+ return {
4656
+ "limit": budget_limit,
4657
+ "used": used,
4658
+ "remaining": round(remaining, 6) if remaining is not None else None,
4659
+ "percent_used": percent_used,
4660
+ "status": status,
4661
+ "warn_threshold_percent": int(_BUDGET_WARN_FRACTION * 100),
4662
+ "exceeded": status == "exceeded",
4663
+ }
4664
+
4665
+
4666
+ @app.get("/api/cost/timeline")
4667
+ async def get_cost_timeline():
4668
+ """Cost over time: intra-run per-iteration series + per-run history.
4669
+
4670
+ Two honest series, distinct sources (see docs/R3-COST-OBSERVABILITY-DESIGN.md):
4671
+ - current_run: from .loki/metrics/efficiency/iteration-*.json. This dir is
4672
+ wiped at the start of every run (run.sh), so it only ever holds the
4673
+ CURRENT run's iterations. Used for the intra-run cumulative line.
4674
+ - runs: from .loki/proofs/<run_id>/proof.json (persistent, one per run).
4675
+ This is the real per-run/per-project "cost over time" history.
4676
+
4677
+ Budget status is computed at read time (no budget.json schema change) and
4678
+ classifies into ok/warn/exceeded so the UI can warn at 80% before the cap.
4679
+ Cost is never fabricated: when nothing was recorded, cost_recorded is False
4680
+ and totals are honestly null rather than a misleading $0.00.
4681
+ """
4682
+ loki_dir = _get_loki_dir()
4683
+ efficiency_dir = loki_dir / "metrics" / "efficiency"
4684
+
4685
+ # --- current run: per-iteration series from efficiency/ -----------------
4686
+ iterations: list = []
4687
+ current_total = 0.0
4688
+ cost_recorded = False
4689
+ if efficiency_dir.exists():
4690
+ records = []
4691
+ for eff_file in sorted(efficiency_dir.glob("iteration-*.json")):
4692
+ data = _safe_json_read(eff_file, default=None)
4693
+ if not isinstance(data, dict):
4694
+ continue
4695
+ records.append(data)
4696
+ # Sort by numeric iteration when present, else by filename order.
4697
+ def _iter_key(d):
4698
+ try:
4699
+ return int(d.get("iteration", 0))
4700
+ except (TypeError, ValueError):
4701
+ return 0
4702
+ records.sort(key=_iter_key)
4703
+ cumulative = 0.0
4704
+ for data in records:
4705
+ cost_recorded = True
4706
+ inp = data.get("input_tokens", 0) or 0
4707
+ out = data.get("output_tokens", 0) or 0
4708
+ model = str(data.get("model", "sonnet")).lower()
4709
+ cost = data.get("cost_usd")
4710
+ if cost is None:
4711
+ cost = _calculate_model_cost(model, inp, out)
4712
+ else:
4713
+ try:
4714
+ cost = float(cost)
4715
+ except (TypeError, ValueError):
4716
+ cost = 0.0
4717
+ cumulative += cost
4718
+ iterations.append({
4719
+ "iteration": data.get("iteration"),
4720
+ "timestamp": data.get("timestamp"),
4721
+ "model": model,
4722
+ "phase": data.get("phase", "unknown"),
4723
+ "provider": data.get("provider"),
4724
+ "input_tokens": inp,
4725
+ "output_tokens": out,
4726
+ "cost_usd": round(cost, 6),
4727
+ "cumulative_usd": round(cumulative, 6),
4728
+ })
4729
+ current_total = cumulative
4730
+
4731
+ # --- per-run history: from .loki/proofs/*/proof.json --------------------
4732
+ runs: list = []
4733
+ project_total = 0.0
4734
+ proofs_dir = _proofs_dir()
4735
+ try:
4736
+ entries = sorted(proofs_dir.iterdir())
4737
+ except (OSError, FileNotFoundError):
4738
+ entries = []
4739
+ for entry in entries:
4740
+ if not entry.is_dir():
4741
+ continue
4742
+ data = _safe_json_read(entry / "proof.json", default=None)
4743
+ if not isinstance(data, dict):
4744
+ continue
4745
+ run_cost = (data.get("cost") or {}).get("usd")
4746
+ run_cost_num = None
4747
+ if run_cost is not None:
4748
+ try:
4749
+ run_cost_num = float(run_cost)
4750
+ project_total += run_cost_num
4751
+ except (TypeError, ValueError):
4752
+ run_cost_num = None
4753
+ runs.append({
4754
+ "run_id": data.get("run_id", entry.name),
4755
+ "generated_at": data.get("generated_at"),
4756
+ "model": (data.get("provider") or {}).get("model"),
4757
+ "cost_usd": round(run_cost_num, 6) if run_cost_num is not None else None,
4758
+ "files_changed": (data.get("files_changed") or {}).get("count"),
4759
+ "final_verdict": (data.get("council") or {}).get("final_verdict"),
4760
+ })
4761
+ runs.sort(key=lambda x: (x.get("generated_at") or ""), reverse=True)
4762
+
4763
+ # --- budget block (read-time status; no mutation) -----------------------
4764
+ # Shared snapshot so the pull endpoint and the proactive WS push agree.
4765
+ # Budget "used" is the current run's spend (mirrors check_budget_limit,
4766
+ # which sums the live efficiency dir against the cap). The per-project
4767
+ # history total is reported separately as project_total_usd.
4768
+ budget = _compute_budget_snapshot(loki_dir)
4769
+
4770
+ return {
4771
+ "current_run": {
4772
+ "iterations": iterations,
4773
+ "total_usd": round(current_total, 6) if cost_recorded else None,
4774
+ "cost_recorded": cost_recorded,
4775
+ },
4776
+ "runs": runs,
4777
+ "runs_count": len(runs),
4778
+ "project_total_usd": round(project_total, 6) if runs else 0.0,
4779
+ "budget": budget,
4780
+ }
4781
+
4782
+
4554
4783
  # =============================================================================
4555
4784
  # Pricing API
4556
4785
  # =============================================================================
@@ -6428,6 +6657,19 @@ async def serve_favicon():
6428
6657
  return Response(status_code=404)
6429
6658
 
6430
6659
 
6660
+ # Serve the self-contained cost + observability panel (R3). Zero-build
6661
+ # standalone page that fetches /api/cost/timeline. Mirrors the proofs.html
6662
+ # pattern: works without the SPA build.
6663
+ @app.get("/cost", include_in_schema=False)
6664
+ async def serve_cost_panel():
6665
+ """Serve the standalone cost + observability HTML panel."""
6666
+ if STATIC_DIR:
6667
+ cost_path = os.path.join(STATIC_DIR, "cost.html")
6668
+ if os.path.isfile(cost_path):
6669
+ return FileResponse(cost_path, media_type="text/html")
6670
+ return Response(status_code=404)
6671
+
6672
+
6431
6673
  # Serve index.html or standalone HTML for root
6432
6674
  @app.get("/", include_in_schema=False)
6433
6675
  async def serve_index():