loki-mode 7.10.1 → 7.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/loki +297 -0
- package/autonomy/run.sh +22 -0
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +242 -0
- package/dashboard/static/cost.html +274 -0
- package/dashboard/static/index.html +94 -0
- package/docs/INSTALLATION.md +1 -1
- package/docs/R3-COST-OBSERVABILITY-DESIGN.md +147 -0
- package/loki-ts/dist/loki.js +144 -144
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
package/SKILL.md
CHANGED
|
@@ -3,7 +3,7 @@ name: loki-mode
|
|
|
3
3
|
description: Autonomous spec-to-product system. Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product via the RARV-C closure loop, with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# Loki Mode v7.
|
|
6
|
+
# Loki Mode v7.11.0
|
|
7
7
|
|
|
8
8
|
**You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
|
|
9
9
|
|
|
@@ -381,4 +381,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
|
|
|
381
381
|
|
|
382
382
|
---
|
|
383
383
|
|
|
384
|
-
**v7.
|
|
384
|
+
**v7.11.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
7.
|
|
1
|
+
7.11.0
|
package/autonomy/loki
CHANGED
|
@@ -543,6 +543,7 @@ show_help() {
|
|
|
543
543
|
echo " optimize Optimize prompts based on session history"
|
|
544
544
|
echo " enterprise Enterprise feature management (tokens, OIDC)"
|
|
545
545
|
echo " metrics [opts] Session productivity report (--json, --last N, --save, --share)"
|
|
546
|
+
echo " cost [opts] Transparent cost view: per-run/project spend + budget (--json, --last N)"
|
|
546
547
|
echo " dogfood Show self-development statistics"
|
|
547
548
|
echo " secrets [cmd] API key status and validation (status|validate)"
|
|
548
549
|
echo " reset [target] Reset session state (all|retries|failed)"
|
|
@@ -13034,6 +13035,9 @@ main() {
|
|
|
13034
13035
|
metrics)
|
|
13035
13036
|
cmd_metrics "$@"
|
|
13036
13037
|
;;
|
|
13038
|
+
cost)
|
|
13039
|
+
cmd_cost "$@"
|
|
13040
|
+
;;
|
|
13037
13041
|
syslog)
|
|
13038
13042
|
cmd_syslog "$@"
|
|
13039
13043
|
;;
|
|
@@ -17833,6 +17837,299 @@ cmd_syslog() {
|
|
|
17833
17837
|
esac
|
|
17834
17838
|
}
|
|
17835
17839
|
|
|
17840
|
+
# Transparent cost view (R3): per-run + per-project spend, model routing, and
|
|
17841
|
+
# budget status with the 80% warn line. Reuses efficiency_cost.collect_efficiency
|
|
17842
|
+
# for the current-run aggregate (single source of truth) and reads .loki/proofs/
|
|
17843
|
+
# for persistent per-run history. Honest: prints "not recorded" when cost was
|
|
17844
|
+
# never collected, never a fabricated $0.00.
|
|
17845
|
+
cmd_cost() {
|
|
17846
|
+
local show_json=false
|
|
17847
|
+
local last_n=0
|
|
17848
|
+
|
|
17849
|
+
while [[ $# -gt 0 ]]; do
|
|
17850
|
+
case "$1" in
|
|
17851
|
+
--help|-h)
|
|
17852
|
+
echo -e "${BOLD}loki cost${NC} - Transparent cost and budget view"
|
|
17853
|
+
echo ""
|
|
17854
|
+
echo "Usage: loki cost [options]"
|
|
17855
|
+
echo ""
|
|
17856
|
+
echo "Shows the current run's spend (from .loki/metrics/efficiency/),"
|
|
17857
|
+
echo "per-run history (from .loki/proofs/), model routing by spend, and"
|
|
17858
|
+
echo "budget status. Budgets warn at 80% and hard-stop at 100%."
|
|
17859
|
+
echo ""
|
|
17860
|
+
echo "Options:"
|
|
17861
|
+
echo " --json Machine-readable JSON output"
|
|
17862
|
+
echo " --last N Show only the last N runs in history (default: all)"
|
|
17863
|
+
echo " --help, -h Show this help"
|
|
17864
|
+
echo ""
|
|
17865
|
+
echo "Examples:"
|
|
17866
|
+
echo " loki cost # Cost summary + budget status"
|
|
17867
|
+
echo " loki cost --json # Machine-readable output"
|
|
17868
|
+
echo " loki cost --last 10 # Last 10 runs of history"
|
|
17869
|
+
echo ""
|
|
17870
|
+
echo "Budget cap: set LOKI_BUDGET_LIMIT (USD). Warns at 80%, stops at 100%."
|
|
17871
|
+
exit 0
|
|
17872
|
+
;;
|
|
17873
|
+
--json) show_json=true; shift ;;
|
|
17874
|
+
--last) last_n="${2:-0}"; shift 2 ;;
|
|
17875
|
+
--last=*) last_n="${1#*=}"; shift ;;
|
|
17876
|
+
*) echo -e "${RED}Unknown option: $1${NC}"; echo "Run 'loki cost --help' for usage."; exit 1 ;;
|
|
17877
|
+
esac
|
|
17878
|
+
done
|
|
17879
|
+
|
|
17880
|
+
local loki_dir="${LOKI_DIR:-.loki}"
|
|
17881
|
+
|
|
17882
|
+
if ! command -v python3 &>/dev/null; then
|
|
17883
|
+
echo -e "${RED}python3 is required for the cost view${NC}"
|
|
17884
|
+
exit 1
|
|
17885
|
+
fi
|
|
17886
|
+
|
|
17887
|
+
LOKI_DIR="$loki_dir" \
|
|
17888
|
+
LOKI_SKILL_DIR="$SKILL_DIR" \
|
|
17889
|
+
COST_JSON="$show_json" \
|
|
17890
|
+
COST_LAST_N="$last_n" \
|
|
17891
|
+
COST_BUDGET_LIMIT="${LOKI_BUDGET_LIMIT:-}" \
|
|
17892
|
+
python3 << 'COST_SCRIPT'
|
|
17893
|
+
import json
|
|
17894
|
+
import os
|
|
17895
|
+
import sys
|
|
17896
|
+
|
|
17897
|
+
loki_dir = os.environ.get("LOKI_DIR", ".loki")
|
|
17898
|
+
skill_dir = os.environ.get("LOKI_SKILL_DIR", "")
|
|
17899
|
+
show_json = os.environ.get("COST_JSON", "false") == "true"
|
|
17900
|
+
try:
|
|
17901
|
+
last_n = int(os.environ.get("COST_LAST_N", "0") or "0")
|
|
17902
|
+
except ValueError:
|
|
17903
|
+
last_n = 0
|
|
17904
|
+
budget_limit_env = os.environ.get("COST_BUDGET_LIMIT", "").strip()
|
|
17905
|
+
|
|
17906
|
+
# ANSI (suppressed under --json / non-tty)
|
|
17907
|
+
use_color = (not show_json) and sys.stdout.isatty()
|
|
17908
|
+
BOLD = "\033[1m" if use_color else ""
|
|
17909
|
+
DIM = "\033[2m" if use_color else ""
|
|
17910
|
+
CYAN = "\033[36m" if use_color else ""
|
|
17911
|
+
GREEN = "\033[32m" if use_color else ""
|
|
17912
|
+
YELLOW = "\033[33m" if use_color else ""
|
|
17913
|
+
RED = "\033[31m" if use_color else ""
|
|
17914
|
+
NC = "\033[0m" if use_color else ""
|
|
17915
|
+
|
|
17916
|
+
# Reuse the shared cost lib (single source of truth). Never duplicate the
|
|
17917
|
+
# cost-summing logic; collect_efficiency returns usd=None when nothing was
|
|
17918
|
+
# recorded, which we surface honestly.
|
|
17919
|
+
collect_efficiency = None
|
|
17920
|
+
if skill_dir:
|
|
17921
|
+
lib_dir = os.path.join(skill_dir, "autonomy", "lib")
|
|
17922
|
+
if lib_dir not in sys.path:
|
|
17923
|
+
sys.path.insert(0, lib_dir)
|
|
17924
|
+
try:
|
|
17925
|
+
from efficiency_cost import collect_efficiency as _ce
|
|
17926
|
+
collect_efficiency = _ce
|
|
17927
|
+
except Exception:
|
|
17928
|
+
collect_efficiency = None
|
|
17929
|
+
|
|
17930
|
+
def _fmt_usd(v):
|
|
17931
|
+
if v is None:
|
|
17932
|
+
return "not recorded"
|
|
17933
|
+
try:
|
|
17934
|
+
n = float(v)
|
|
17935
|
+
except (TypeError, ValueError):
|
|
17936
|
+
return "not recorded"
|
|
17937
|
+
s = ("%.4f" % n).rstrip("0").rstrip(".")
|
|
17938
|
+
if "." not in s:
|
|
17939
|
+
s += ".00"
|
|
17940
|
+
elif len(s.split(".")[1]) == 1:
|
|
17941
|
+
s += "0"
|
|
17942
|
+
return "$" + s
|
|
17943
|
+
|
|
17944
|
+
# --- current run aggregate (reuse collect_efficiency, single source) -----
|
|
17945
|
+
# We do NOT re-implement the cost sum here: efficiency_cost.collect_efficiency
|
|
17946
|
+
# is the single source of truth (shared with the proof generator and the R2
|
|
17947
|
+
# benchmark adapters). On a broken install where the lib is missing, we degrade
|
|
17948
|
+
# honestly rather than ship a divergent 5th copy of the cost math.
|
|
17949
|
+
current_cost = None
|
|
17950
|
+
current_model = ""
|
|
17951
|
+
lib_available = collect_efficiency is not None
|
|
17952
|
+
if lib_available:
|
|
17953
|
+
try:
|
|
17954
|
+
cost_dict, current_model = collect_efficiency(loki_dir)
|
|
17955
|
+
current_cost = cost_dict.get("usd")
|
|
17956
|
+
except Exception:
|
|
17957
|
+
current_cost = None
|
|
17958
|
+
|
|
17959
|
+
# --- per-run history from .loki/proofs/ ----------------------------------
|
|
17960
|
+
runs = []
|
|
17961
|
+
project_total = 0.0
|
|
17962
|
+
proofs_dir = os.path.join(loki_dir, "proofs")
|
|
17963
|
+
if os.path.isdir(proofs_dir):
|
|
17964
|
+
for name in sorted(os.listdir(proofs_dir)):
|
|
17965
|
+
run_dir = os.path.join(proofs_dir, name)
|
|
17966
|
+
proof_json = os.path.join(run_dir, "proof.json")
|
|
17967
|
+
if not os.path.isfile(proof_json):
|
|
17968
|
+
continue
|
|
17969
|
+
try:
|
|
17970
|
+
d = json.load(open(proof_json))
|
|
17971
|
+
except Exception:
|
|
17972
|
+
continue
|
|
17973
|
+
if not isinstance(d, dict):
|
|
17974
|
+
continue
|
|
17975
|
+
run_cost = (d.get("cost") or {}).get("usd")
|
|
17976
|
+
run_cost_num = None
|
|
17977
|
+
if run_cost is not None:
|
|
17978
|
+
try:
|
|
17979
|
+
run_cost_num = float(run_cost)
|
|
17980
|
+
project_total += run_cost_num
|
|
17981
|
+
except (TypeError, ValueError):
|
|
17982
|
+
run_cost_num = None
|
|
17983
|
+
runs.append({
|
|
17984
|
+
"run_id": d.get("run_id", name),
|
|
17985
|
+
"generated_at": d.get("generated_at"),
|
|
17986
|
+
"model": (d.get("provider") or {}).get("model"),
|
|
17987
|
+
"cost_usd": run_cost_num,
|
|
17988
|
+
"files_changed": (d.get("files_changed") or {}).get("count"),
|
|
17989
|
+
"final_verdict": (d.get("council") or {}).get("final_verdict"),
|
|
17990
|
+
})
|
|
17991
|
+
runs.sort(key=lambda x: (x.get("generated_at") or ""), reverse=True)
|
|
17992
|
+
if last_n > 0:
|
|
17993
|
+
runs = runs[:last_n]
|
|
17994
|
+
|
|
17995
|
+
# --- budget status (read-time; warn at 80%, exceeded at 100%) ------------
|
|
17996
|
+
budget_limit = None
|
|
17997
|
+
budget_file = os.path.join(loki_dir, "metrics", "budget.json")
|
|
17998
|
+
if os.path.isfile(budget_file):
|
|
17999
|
+
try:
|
|
18000
|
+
bd = json.load(open(budget_file))
|
|
18001
|
+
budget_limit = bd.get("limit") or bd.get("budget_limit")
|
|
18002
|
+
except Exception:
|
|
18003
|
+
budget_limit = None
|
|
18004
|
+
if budget_limit is None and budget_limit_env:
|
|
18005
|
+
try:
|
|
18006
|
+
budget_limit = float(budget_limit_env)
|
|
18007
|
+
except ValueError:
|
|
18008
|
+
budget_limit = None
|
|
18009
|
+
if budget_limit is not None:
|
|
18010
|
+
try:
|
|
18011
|
+
budget_limit = float(budget_limit)
|
|
18012
|
+
except (TypeError, ValueError):
|
|
18013
|
+
budget_limit = None
|
|
18014
|
+
|
|
18015
|
+
budget_used = current_cost if isinstance(current_cost, (int, float)) else 0.0
|
|
18016
|
+
status = "none"
|
|
18017
|
+
percent_used = None
|
|
18018
|
+
remaining = None
|
|
18019
|
+
if budget_limit is not None and budget_limit > 0:
|
|
18020
|
+
percent_used = round(budget_used / budget_limit * 100, 2)
|
|
18021
|
+
remaining = max(0.0, budget_limit - budget_used)
|
|
18022
|
+
if budget_used >= budget_limit:
|
|
18023
|
+
status = "exceeded"
|
|
18024
|
+
elif budget_used >= 0.80 * budget_limit:
|
|
18025
|
+
status = "warn"
|
|
18026
|
+
else:
|
|
18027
|
+
status = "ok"
|
|
18028
|
+
|
|
18029
|
+
# --- model routing by spend (from run history) ---------------------------
|
|
18030
|
+
by_model = {}
|
|
18031
|
+
for r in runs:
|
|
18032
|
+
c = r.get("cost_usd")
|
|
18033
|
+
if c is None:
|
|
18034
|
+
continue
|
|
18035
|
+
m = r.get("model") or "unknown"
|
|
18036
|
+
by_model[m] = by_model.get(m, 0.0) + c
|
|
18037
|
+
|
|
18038
|
+
if show_json:
|
|
18039
|
+
out = {
|
|
18040
|
+
"current_run": {
|
|
18041
|
+
"cost_usd": current_cost,
|
|
18042
|
+
"model": current_model or None,
|
|
18043
|
+
"cost_recorded": current_cost is not None,
|
|
18044
|
+
"cost_lib_available": lib_available,
|
|
18045
|
+
},
|
|
18046
|
+
"runs": runs,
|
|
18047
|
+
"runs_count": len(runs),
|
|
18048
|
+
"project_total_usd": round(project_total, 6) if runs else 0.0,
|
|
18049
|
+
"by_model": {k: round(v, 6) for k, v in by_model.items()},
|
|
18050
|
+
"budget": {
|
|
18051
|
+
"limit": budget_limit,
|
|
18052
|
+
"used": round(budget_used, 6),
|
|
18053
|
+
"remaining": round(remaining, 6) if remaining is not None else None,
|
|
18054
|
+
"percent_used": percent_used,
|
|
18055
|
+
"status": status,
|
|
18056
|
+
"warn_threshold_percent": 80,
|
|
18057
|
+
"exceeded": status == "exceeded",
|
|
18058
|
+
},
|
|
18059
|
+
}
|
|
18060
|
+
print(json.dumps(out, indent=2))
|
|
18061
|
+
sys.exit(0)
|
|
18062
|
+
|
|
18063
|
+
# --- human-readable ------------------------------------------------------
|
|
18064
|
+
ds = chr(36)
|
|
18065
|
+
print()
|
|
18066
|
+
print(BOLD + "Loki Cost" + NC)
|
|
18067
|
+
print(DIM + "=" * 50 + NC)
|
|
18068
|
+
|
|
18069
|
+
print()
|
|
18070
|
+
print(CYAN + "Current run" + NC)
|
|
18071
|
+
if not lib_available:
|
|
18072
|
+
print(DIM + " Cost library unavailable (efficiency_cost.py not found)." + NC)
|
|
18073
|
+
print(DIM + " Current-run spend cannot be computed on this install." + NC)
|
|
18074
|
+
elif current_cost is None:
|
|
18075
|
+
print(" Cost not recorded for this run.")
|
|
18076
|
+
else:
|
|
18077
|
+
mtxt = (" (" + current_model + ")") if current_model else ""
|
|
18078
|
+
print(" Spend: " + BOLD + _fmt_usd(current_cost) + NC + mtxt)
|
|
18079
|
+
|
|
18080
|
+
print()
|
|
18081
|
+
print(CYAN + "Project history" + NC)
|
|
18082
|
+
print(" Runs recorded: " + str(len(runs)))
|
|
18083
|
+
print(" Total spend: " + BOLD + (_fmt_usd(project_total) if runs else "$0.00") + NC)
|
|
18084
|
+
|
|
18085
|
+
if by_model:
|
|
18086
|
+
print()
|
|
18087
|
+
print(CYAN + "Model routing (by spend)" + NC)
|
|
18088
|
+
total_m = sum(by_model.values()) or 1.0
|
|
18089
|
+
for m in sorted(by_model, key=lambda k: by_model[k], reverse=True):
|
|
18090
|
+
v = by_model[m]
|
|
18091
|
+
pct = v / total_m * 100
|
|
18092
|
+
bar_len = int(pct / 5)
|
|
18093
|
+
bar = "#" * bar_len + "." * (20 - bar_len)
|
|
18094
|
+
print(" {:<16} {}{:>9} ({:4.1f}%) [{}]".format(m[:16], "", _fmt_usd(v), pct, bar))
|
|
18095
|
+
|
|
18096
|
+
print()
|
|
18097
|
+
print(CYAN + "Budget" + NC)
|
|
18098
|
+
if budget_limit is None:
|
|
18099
|
+
print(" No cap set. Set LOKI_BUDGET_LIMIT (USD) to cap spend.")
|
|
18100
|
+
print(DIM + " When set, Loki warns at 80% and hard-stops at 100%." + NC)
|
|
18101
|
+
else:
|
|
18102
|
+
col = GREEN
|
|
18103
|
+
if status == "warn":
|
|
18104
|
+
col = YELLOW
|
|
18105
|
+
elif status == "exceeded":
|
|
18106
|
+
col = RED
|
|
18107
|
+
print(" Cap: " + _fmt_usd(budget_limit))
|
|
18108
|
+
print(" Used: " + _fmt_usd(budget_used) + " (" + col + str(percent_used) + "%" + NC + ")")
|
|
18109
|
+
print(" Remaining: " + _fmt_usd(remaining))
|
|
18110
|
+
print(" Status: " + col + BOLD + status.upper() + NC)
|
|
18111
|
+
if status == "warn":
|
|
18112
|
+
print(YELLOW + " Warning: at or above 80% of cap. Run continues; hard-stop at 100%." + NC)
|
|
18113
|
+
elif status == "exceeded":
|
|
18114
|
+
print(RED + " Cap reached. The run is paused to prevent a surprise bill." + NC)
|
|
18115
|
+
|
|
18116
|
+
if runs:
|
|
18117
|
+
print()
|
|
18118
|
+
print(CYAN + "Recent runs" + NC)
|
|
18119
|
+
print(DIM + " {:<28} {:<10} {:>9} {}".format("Run", "Model", "Cost", "Verdict") + NC)
|
|
18120
|
+
for r in runs[:max(last_n, 10) if last_n else 10]:
|
|
18121
|
+
rid = str(r.get("run_id") or "")[:28]
|
|
18122
|
+
mdl = str(r.get("model") or "")[:10]
|
|
18123
|
+
cst = _fmt_usd(r.get("cost_usd"))
|
|
18124
|
+
vrd = str(r.get("final_verdict") or "")
|
|
18125
|
+
print(" {:<28} {:<10} {:>9} {}".format(rid, mdl, cst, vrd))
|
|
18126
|
+
|
|
18127
|
+
print()
|
|
18128
|
+
print(DIM + "Dashboard cost panel: /cost | JSON: loki cost --json" + NC)
|
|
18129
|
+
print()
|
|
18130
|
+
COST_SCRIPT
|
|
18131
|
+
}
|
|
18132
|
+
|
|
17836
18133
|
# Fetch and display Prometheus metrics from dashboard
|
|
17837
18134
|
cmd_metrics() {
|
|
17838
18135
|
local show_json=false
|
package/autonomy/run.sh
CHANGED
|
@@ -8416,6 +8416,28 @@ BUDGETUPD_EOF
|
|
|
8416
8416
|
BUDGETUPD_EOF
|
|
8417
8417
|
fi
|
|
8418
8418
|
|
|
8419
|
+
# Anti-surprise-cost warn (R3): when spend crosses 80% of the cap but is
|
|
8420
|
+
# still under 100%, log a warning and emit an event. Does NOT pause: the
|
|
8421
|
+
# warn is the transparency the user wants BEFORE the hard cap stops them.
|
|
8422
|
+
# Read-time classification only; budget.json schema is unchanged.
|
|
8423
|
+
local warn
|
|
8424
|
+
warn=$(python3 -c "
|
|
8425
|
+
import sys
|
|
8426
|
+
try:
|
|
8427
|
+
cost = float(sys.argv[1]); limit = float(sys.argv[2])
|
|
8428
|
+
print(1 if (limit > 0 and 0.80 * limit <= cost < limit) else 0)
|
|
8429
|
+
except (ValueError, IndexError):
|
|
8430
|
+
print(0)
|
|
8431
|
+
" "$current_cost" "$BUDGET_LIMIT" 2>/dev/null || echo "0")
|
|
8432
|
+
if [[ "$warn" == "1" ]]; then
|
|
8433
|
+
log_warn "BUDGET WARNING: \$${current_cost} is at or above 80% of cap \$${BUDGET_LIMIT}. Run continues; hard-stop at 100%."
|
|
8434
|
+
emit_event_json "budget_warning" \
|
|
8435
|
+
"limit=${BUDGET_LIMIT}" \
|
|
8436
|
+
"current=${current_cost}" \
|
|
8437
|
+
"threshold_percent=80" \
|
|
8438
|
+
"iteration=${ITERATION_COUNT:-0}"
|
|
8439
|
+
fi
|
|
8440
|
+
|
|
8419
8441
|
return 1
|
|
8420
8442
|
}
|
|
8421
8443
|
|
package/dashboard/__init__.py
CHANGED
package/dashboard/server.py
CHANGED
|
@@ -459,6 +459,7 @@ async def _push_loki_state_loop() -> None:
|
|
|
459
459
|
"""
|
|
460
460
|
last_mtime: float = 0.0
|
|
461
461
|
_last_skill_hash: str = "" # Track skill-session state changes
|
|
462
|
+
_last_budget_status: str = "" # Track budget-status transitions (R3)
|
|
462
463
|
while True:
|
|
463
464
|
try:
|
|
464
465
|
if not manager.active_connections:
|
|
@@ -469,6 +470,26 @@ async def _push_loki_state_loop() -> None:
|
|
|
469
470
|
state_file = loki_dir / "dashboard-state.json"
|
|
470
471
|
_session_file = loki_dir / "session.json"
|
|
471
472
|
|
|
473
|
+
# R3 anti-surprise-cost: proactively push a budget_status message
|
|
474
|
+
# when spend crosses a threshold (ok -> warn -> exceeded), so a user
|
|
475
|
+
# who is not watching the terminal sees the 80% warning in any open
|
|
476
|
+
# dashboard page BEFORE the hard stop at 100%. Reuses the existing
|
|
477
|
+
# WebSocket broadcast path (manager.broadcast); no second channel.
|
|
478
|
+
# Sent on transition (independent of the dashboard-state.json mtime
|
|
479
|
+
# gate) because budget can cross 80% while that file is unchanged.
|
|
480
|
+
try:
|
|
481
|
+
_budget = _compute_budget_snapshot(loki_dir)
|
|
482
|
+
_bstatus = _budget.get("status", "none")
|
|
483
|
+
if _bstatus in ("warn", "exceeded") and _bstatus != _last_budget_status:
|
|
484
|
+
await manager.broadcast({
|
|
485
|
+
"type": "budget_status",
|
|
486
|
+
"data": _budget,
|
|
487
|
+
})
|
|
488
|
+
# Track every status so a return to ok/none re-arms the warn push.
|
|
489
|
+
_last_budget_status = _bstatus
|
|
490
|
+
except (OSError, ValueError, KeyError):
|
|
491
|
+
pass
|
|
492
|
+
|
|
472
493
|
_broadcast_sent = False
|
|
473
494
|
|
|
474
495
|
if state_file.exists():
|
|
@@ -4551,6 +4572,214 @@ async def get_budget():
|
|
|
4551
4572
|
}
|
|
4552
4573
|
|
|
4553
4574
|
|
|
4575
|
+
# Budget warn threshold: surface a "warn" status before the hard cap so users
|
|
4576
|
+
# are not surprised by a bill. Matches the runtime warn in run.sh
|
|
4577
|
+
# check_budget_limit() and budget.ts (warn at 80%, hard-stop at 100%).
|
|
4578
|
+
_BUDGET_WARN_FRACTION = 0.80
|
|
4579
|
+
|
|
4580
|
+
|
|
4581
|
+
def _budget_status(used: float, limit: Optional[float]) -> str:
|
|
4582
|
+
"""Classify budget usage. Read-time only; no state mutation.
|
|
4583
|
+
|
|
4584
|
+
Returns one of: "none" (no limit set), "ok" (<80%), "warn" (>=80% and
|
|
4585
|
+
<100%), "exceeded" (>=100%). The warn band is the anti-surprise wedge:
|
|
4586
|
+
the user sees it BEFORE the hard cap pauses the run.
|
|
4587
|
+
"""
|
|
4588
|
+
if limit is None or limit <= 0:
|
|
4589
|
+
return "none"
|
|
4590
|
+
if used >= limit:
|
|
4591
|
+
return "exceeded"
|
|
4592
|
+
if used >= _BUDGET_WARN_FRACTION * limit:
|
|
4593
|
+
return "warn"
|
|
4594
|
+
return "ok"
|
|
4595
|
+
|
|
4596
|
+
|
|
4597
|
+
def _compute_budget_snapshot(loki_dir: _Path) -> dict:
|
|
4598
|
+
"""Read-time budget snapshot shared by /api/cost/timeline and the WS push.
|
|
4599
|
+
|
|
4600
|
+
Single source of truth so the proactive WebSocket broadcast and the pull
|
|
4601
|
+
endpoint never disagree. "used" is the current run's spend (sum of the live
|
|
4602
|
+
.loki/metrics/efficiency/iteration-*.json records, mirroring
|
|
4603
|
+
check_budget_limit in run.sh). The cap comes from budget.json, falling back
|
|
4604
|
+
to the LOKI_BUDGET_LIMIT env var. No state is mutated.
|
|
4605
|
+
"""
|
|
4606
|
+
efficiency_dir = loki_dir / "metrics" / "efficiency"
|
|
4607
|
+
budget_file = loki_dir / "metrics" / "budget.json"
|
|
4608
|
+
|
|
4609
|
+
current_total = 0.0
|
|
4610
|
+
if efficiency_dir.exists():
|
|
4611
|
+
for eff_file in sorted(efficiency_dir.glob("iteration-*.json")):
|
|
4612
|
+
data = _safe_json_read(eff_file, default=None)
|
|
4613
|
+
if not isinstance(data, dict):
|
|
4614
|
+
continue
|
|
4615
|
+
inp = data.get("input_tokens", 0) or 0
|
|
4616
|
+
out = data.get("output_tokens", 0) or 0
|
|
4617
|
+
model = str(data.get("model", "sonnet")).lower()
|
|
4618
|
+
cost = data.get("cost_usd")
|
|
4619
|
+
if cost is None:
|
|
4620
|
+
cost = _calculate_model_cost(model, inp, out)
|
|
4621
|
+
else:
|
|
4622
|
+
try:
|
|
4623
|
+
cost = float(cost)
|
|
4624
|
+
except (TypeError, ValueError):
|
|
4625
|
+
cost = 0.0
|
|
4626
|
+
current_total += cost
|
|
4627
|
+
|
|
4628
|
+
budget_limit = None
|
|
4629
|
+
if budget_file.exists():
|
|
4630
|
+
bdata = _safe_json_read(budget_file, default=None)
|
|
4631
|
+
if isinstance(bdata, dict):
|
|
4632
|
+
budget_limit = bdata.get("limit") or bdata.get("budget_limit")
|
|
4633
|
+
if budget_limit is None:
|
|
4634
|
+
env_limit = os.environ.get("LOKI_BUDGET_LIMIT", "")
|
|
4635
|
+
if env_limit:
|
|
4636
|
+
try:
|
|
4637
|
+
budget_limit = float(env_limit)
|
|
4638
|
+
except ValueError:
|
|
4639
|
+
budget_limit = None
|
|
4640
|
+
if budget_limit is not None:
|
|
4641
|
+
try:
|
|
4642
|
+
budget_limit = float(budget_limit)
|
|
4643
|
+
except (TypeError, ValueError):
|
|
4644
|
+
budget_limit = None
|
|
4645
|
+
|
|
4646
|
+
used = round(current_total, 6)
|
|
4647
|
+
if budget_limit is not None and budget_limit > 0:
|
|
4648
|
+
remaining = max(0.0, budget_limit - used)
|
|
4649
|
+
percent_used = round((used / budget_limit) * 100, 2)
|
|
4650
|
+
else:
|
|
4651
|
+
remaining = None
|
|
4652
|
+
percent_used = None
|
|
4653
|
+
status = _budget_status(used, budget_limit)
|
|
4654
|
+
|
|
4655
|
+
return {
|
|
4656
|
+
"limit": budget_limit,
|
|
4657
|
+
"used": used,
|
|
4658
|
+
"remaining": round(remaining, 6) if remaining is not None else None,
|
|
4659
|
+
"percent_used": percent_used,
|
|
4660
|
+
"status": status,
|
|
4661
|
+
"warn_threshold_percent": int(_BUDGET_WARN_FRACTION * 100),
|
|
4662
|
+
"exceeded": status == "exceeded",
|
|
4663
|
+
}
|
|
4664
|
+
|
|
4665
|
+
|
|
4666
|
+
@app.get("/api/cost/timeline")
|
|
4667
|
+
async def get_cost_timeline():
|
|
4668
|
+
"""Cost over time: intra-run per-iteration series + per-run history.
|
|
4669
|
+
|
|
4670
|
+
Two honest series, distinct sources (see docs/R3-COST-OBSERVABILITY-DESIGN.md):
|
|
4671
|
+
- current_run: from .loki/metrics/efficiency/iteration-*.json. This dir is
|
|
4672
|
+
wiped at the start of every run (run.sh), so it only ever holds the
|
|
4673
|
+
CURRENT run's iterations. Used for the intra-run cumulative line.
|
|
4674
|
+
- runs: from .loki/proofs/<run_id>/proof.json (persistent, one per run).
|
|
4675
|
+
This is the real per-run/per-project "cost over time" history.
|
|
4676
|
+
|
|
4677
|
+
Budget status is computed at read time (no budget.json schema change) and
|
|
4678
|
+
classifies into ok/warn/exceeded so the UI can warn at 80% before the cap.
|
|
4679
|
+
Cost is never fabricated: when nothing was recorded, cost_recorded is False
|
|
4680
|
+
and totals are honestly null rather than a misleading $0.00.
|
|
4681
|
+
"""
|
|
4682
|
+
loki_dir = _get_loki_dir()
|
|
4683
|
+
efficiency_dir = loki_dir / "metrics" / "efficiency"
|
|
4684
|
+
|
|
4685
|
+
# --- current run: per-iteration series from efficiency/ -----------------
|
|
4686
|
+
iterations: list = []
|
|
4687
|
+
current_total = 0.0
|
|
4688
|
+
cost_recorded = False
|
|
4689
|
+
if efficiency_dir.exists():
|
|
4690
|
+
records = []
|
|
4691
|
+
for eff_file in sorted(efficiency_dir.glob("iteration-*.json")):
|
|
4692
|
+
data = _safe_json_read(eff_file, default=None)
|
|
4693
|
+
if not isinstance(data, dict):
|
|
4694
|
+
continue
|
|
4695
|
+
records.append(data)
|
|
4696
|
+
# Sort by numeric iteration when present, else by filename order.
|
|
4697
|
+
def _iter_key(d):
|
|
4698
|
+
try:
|
|
4699
|
+
return int(d.get("iteration", 0))
|
|
4700
|
+
except (TypeError, ValueError):
|
|
4701
|
+
return 0
|
|
4702
|
+
records.sort(key=_iter_key)
|
|
4703
|
+
cumulative = 0.0
|
|
4704
|
+
for data in records:
|
|
4705
|
+
cost_recorded = True
|
|
4706
|
+
inp = data.get("input_tokens", 0) or 0
|
|
4707
|
+
out = data.get("output_tokens", 0) or 0
|
|
4708
|
+
model = str(data.get("model", "sonnet")).lower()
|
|
4709
|
+
cost = data.get("cost_usd")
|
|
4710
|
+
if cost is None:
|
|
4711
|
+
cost = _calculate_model_cost(model, inp, out)
|
|
4712
|
+
else:
|
|
4713
|
+
try:
|
|
4714
|
+
cost = float(cost)
|
|
4715
|
+
except (TypeError, ValueError):
|
|
4716
|
+
cost = 0.0
|
|
4717
|
+
cumulative += cost
|
|
4718
|
+
iterations.append({
|
|
4719
|
+
"iteration": data.get("iteration"),
|
|
4720
|
+
"timestamp": data.get("timestamp"),
|
|
4721
|
+
"model": model,
|
|
4722
|
+
"phase": data.get("phase", "unknown"),
|
|
4723
|
+
"provider": data.get("provider"),
|
|
4724
|
+
"input_tokens": inp,
|
|
4725
|
+
"output_tokens": out,
|
|
4726
|
+
"cost_usd": round(cost, 6),
|
|
4727
|
+
"cumulative_usd": round(cumulative, 6),
|
|
4728
|
+
})
|
|
4729
|
+
current_total = cumulative
|
|
4730
|
+
|
|
4731
|
+
# --- per-run history: from .loki/proofs/*/proof.json --------------------
|
|
4732
|
+
runs: list = []
|
|
4733
|
+
project_total = 0.0
|
|
4734
|
+
proofs_dir = _proofs_dir()
|
|
4735
|
+
try:
|
|
4736
|
+
entries = sorted(proofs_dir.iterdir())
|
|
4737
|
+
except (OSError, FileNotFoundError):
|
|
4738
|
+
entries = []
|
|
4739
|
+
for entry in entries:
|
|
4740
|
+
if not entry.is_dir():
|
|
4741
|
+
continue
|
|
4742
|
+
data = _safe_json_read(entry / "proof.json", default=None)
|
|
4743
|
+
if not isinstance(data, dict):
|
|
4744
|
+
continue
|
|
4745
|
+
run_cost = (data.get("cost") or {}).get("usd")
|
|
4746
|
+
run_cost_num = None
|
|
4747
|
+
if run_cost is not None:
|
|
4748
|
+
try:
|
|
4749
|
+
run_cost_num = float(run_cost)
|
|
4750
|
+
project_total += run_cost_num
|
|
4751
|
+
except (TypeError, ValueError):
|
|
4752
|
+
run_cost_num = None
|
|
4753
|
+
runs.append({
|
|
4754
|
+
"run_id": data.get("run_id", entry.name),
|
|
4755
|
+
"generated_at": data.get("generated_at"),
|
|
4756
|
+
"model": (data.get("provider") or {}).get("model"),
|
|
4757
|
+
"cost_usd": round(run_cost_num, 6) if run_cost_num is not None else None,
|
|
4758
|
+
"files_changed": (data.get("files_changed") or {}).get("count"),
|
|
4759
|
+
"final_verdict": (data.get("council") or {}).get("final_verdict"),
|
|
4760
|
+
})
|
|
4761
|
+
runs.sort(key=lambda x: (x.get("generated_at") or ""), reverse=True)
|
|
4762
|
+
|
|
4763
|
+
# --- budget block (read-time status; no mutation) -----------------------
|
|
4764
|
+
# Shared snapshot so the pull endpoint and the proactive WS push agree.
|
|
4765
|
+
# Budget "used" is the current run's spend (mirrors check_budget_limit,
|
|
4766
|
+
# which sums the live efficiency dir against the cap). The per-project
|
|
4767
|
+
# history total is reported separately as project_total_usd.
|
|
4768
|
+
budget = _compute_budget_snapshot(loki_dir)
|
|
4769
|
+
|
|
4770
|
+
return {
|
|
4771
|
+
"current_run": {
|
|
4772
|
+
"iterations": iterations,
|
|
4773
|
+
"total_usd": round(current_total, 6) if cost_recorded else None,
|
|
4774
|
+
"cost_recorded": cost_recorded,
|
|
4775
|
+
},
|
|
4776
|
+
"runs": runs,
|
|
4777
|
+
"runs_count": len(runs),
|
|
4778
|
+
"project_total_usd": round(project_total, 6) if runs else 0.0,
|
|
4779
|
+
"budget": budget,
|
|
4780
|
+
}
|
|
4781
|
+
|
|
4782
|
+
|
|
4554
4783
|
# =============================================================================
|
|
4555
4784
|
# Pricing API
|
|
4556
4785
|
# =============================================================================
|
|
@@ -6428,6 +6657,19 @@ async def serve_favicon():
|
|
|
6428
6657
|
return Response(status_code=404)
|
|
6429
6658
|
|
|
6430
6659
|
|
|
6660
|
+
# Serve the self-contained cost + observability panel (R3). Zero-build
|
|
6661
|
+
# standalone page that fetches /api/cost/timeline. Mirrors the proofs.html
|
|
6662
|
+
# pattern: works without the SPA build.
|
|
6663
|
+
@app.get("/cost", include_in_schema=False)
|
|
6664
|
+
async def serve_cost_panel():
|
|
6665
|
+
"""Serve the standalone cost + observability HTML panel."""
|
|
6666
|
+
if STATIC_DIR:
|
|
6667
|
+
cost_path = os.path.join(STATIC_DIR, "cost.html")
|
|
6668
|
+
if os.path.isfile(cost_path):
|
|
6669
|
+
return FileResponse(cost_path, media_type="text/html")
|
|
6670
|
+
return Response(status_code=404)
|
|
6671
|
+
|
|
6672
|
+
|
|
6431
6673
|
# Serve index.html or standalone HTML for root
|
|
6432
6674
|
@app.get("/", include_in_schema=False)
|
|
6433
6675
|
async def serve_index():
|