loki-mode 7.10.1 → 7.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -409,6 +409,40 @@
409
409
  display: block;
410
410
  }
411
411
 
412
+ .budget-banner {
413
+ position: fixed;
414
+ top: 0;
415
+ left: 0;
416
+ right: 0;
417
+ padding: 8px 16px;
418
+ text-align: center;
419
+ font-size: 13px;
420
+ font-weight: 600;
421
+ display: none;
422
+ z-index: 1000;
423
+ color: #201515;
424
+ }
425
+
426
+ .budget-banner.show {
427
+ display: block;
428
+ }
429
+
430
+ .budget-banner.warn {
431
+ background: var(--loki-warning);
432
+ }
433
+
434
+ .budget-banner.exceeded {
435
+ background: var(--loki-red);
436
+ color: #fff;
437
+ }
438
+
439
+ .budget-banner a {
440
+ color: inherit;
441
+ text-decoration: underline;
442
+ margin-left: 10px;
443
+ font-weight: 600;
444
+ }
445
+
412
446
  /* Loading state */
413
447
  .loading {
414
448
  display: flex;
@@ -581,6 +615,15 @@
581
615
  Offline - showing cached data
582
616
  </div>
583
617
 
618
+ <!-- Budget Banner (R3 anti-surprise-cost): persistent, visible on every
619
+ page without opening the Cost panel. Amber at >=80% (warn), red at
620
+ >=100% (exceeded). Driven by the existing WebSocket budget_status push
621
+ and a polling fallback against /api/cost/timeline. -->
622
+ <div class="budget-banner" id="budget-banner" role="status" aria-live="polite">
623
+ <span id="budget-banner-text"></span>
624
+ <a href="/cost" id="budget-banner-link">View cost</a>
625
+ </div>
626
+
584
627
  <!-- Dashboard Layout -->
585
628
  <div class="dashboard-layout">
586
629
  <!-- Sidebar -->
@@ -13609,6 +13652,57 @@ document.addEventListener('DOMContentLoaded', function() {
13609
13652
  document.getElementById('offline-banner').classList.add('show');
13610
13653
  }
13611
13654
 
13655
+ // R3 budget banner: a persistent, page-wide indicator so a user running an
13656
+ // overnight job sees the 80% budget warning WITHOUT opening the Cost panel.
13657
+ // It reuses the existing WebSocket push (budget_status -> api:budget_status
13658
+ // on the shared API client) and falls back to polling /api/cost/timeline.
13659
+ (function initBudgetBanner() {
13660
+ var banner = document.getElementById('budget-banner');
13661
+ var textEl = document.getElementById('budget-banner-text');
13662
+ if (!banner || !textEl) return;
13663
+
13664
+ function renderBudget(b) {
13665
+ if (!b || (b.status !== 'warn' && b.status !== 'exceeded')) {
13666
+ banner.classList.remove('show', 'warn', 'exceeded');
13667
+ return;
13668
+ }
13669
+ // Honest copy: "Budget at 82% - hard stop at 100%."
13670
+ var pct = (b.percent_used === null || b.percent_used === undefined)
13671
+ ? null : Number(b.percent_used);
13672
+ var pctTxt = (pct === null || !isFinite(pct)) ? '' : Math.round(pct) + '%';
13673
+ var msg;
13674
+ if (b.status === 'exceeded') {
13675
+ msg = 'Budget cap reached' + (pctTxt ? ' (' + pctTxt + ')' : '') +
13676
+ '. The run is paused to prevent a surprise bill.';
13677
+ } else {
13678
+ msg = 'Budget at ' + (pctTxt || 'over 80%') + ' - hard stop at 100%.';
13679
+ }
13680
+ textEl.textContent = msg;
13681
+ banner.classList.remove('warn', 'exceeded');
13682
+ banner.classList.add('show', b.status);
13683
+ }
13684
+
13685
+ // Polling fallback (the WS push is best-effort; polling guarantees the
13686
+ // banner is correct even on a freshly opened page or after a reconnect).
13687
+ function poll() {
13688
+ fetch('/api/cost/timeline', { headers: { 'Accept': 'application/json' } })
13689
+ .then(function (r) { return r.ok ? r.json() : null; })
13690
+ .then(function (d) { if (d && d.budget) renderBudget(d.budget); })
13691
+ .catch(function () { /* offline / no endpoint: leave banner as-is */ });
13692
+ }
13693
+ poll();
13694
+ setInterval(poll, 15000);
13695
+
13696
+ // Reuse the existing shared WebSocket client for the proactive push.
13697
+ try {
13698
+ var api = LokiDashboard.getApiClient({ baseUrl: window.location.origin });
13699
+ api.addEventListener('api:budget_status', function (e) {
13700
+ renderBudget(e && e.detail);
13701
+ });
13702
+ api.connect().catch(function () { /* polling fallback still covers it */ });
13703
+ } catch (err) { /* polling fallback still covers it */ }
13704
+ })();
13705
+
13612
13706
  // Mobile menu toggle
13613
13707
  var mobileMenuBtn = document.getElementById('mobile-menu-btn');
13614
13708
  var sidebar = document.getElementById('sidebar');
@@ -2,7 +2,7 @@
2
2
 
3
3
  The flagship product of [Autonomi](https://www.autonomi.dev/). Complete installation instructions for all platforms and use cases.
4
4
 
5
- **Version:** v7.10.1
5
+ **Version:** v7.12.0
6
6
 
7
7
  ---
8
8
 
@@ -0,0 +1,147 @@
1
+ # R3: Cost + Observability Dashboard (anti-surprise-cost wedge)
2
+
3
+ Design note. Verified against live source on 2026-06-03 (v7.8.3 worktree).
4
+ No version bumps, no commits to main. This file is a design artifact for the
5
+ integrator; cherry-pick the implementation files listed at the bottom.
6
+
7
+ ## Goal
8
+
9
+ Counter the #1 competitor churn driver (surprise cost) with TRANSPARENT cost:
10
+ per-run and per-project cost USD over time, model-routing visibility, token
11
+ burn, and budget caps that WARN before the cap (at 80%) rather than surprise
12
+ the user, while preserving the existing hard-stop at 100%.
13
+
14
+ ## What already exists (reuse, do NOT duplicate)
15
+
16
+ | Surface | Location | Has | Missing for R3 |
17
+ |---|---|---|---|
18
+ | Aggregate cost | `dashboard/server.py` `GET /api/cost` (~4391) | totals, by_phase, by_model, basic budget | per-run history, time-series, warn status |
19
+ | Budget status | `dashboard/server.py` `GET /api/budget` (~4498) | limit, used, exceeded, remaining | warn-at-80% status field |
20
+ | Pricing | `dashboard/server.py` `GET /api/pricing` (~4575) | model price table | -- |
21
+ | Hard cap | `autonomy/run.sh` `check_budget_limit()` (8333) | pause + signal at >=100% | warn at 80% (no pause) |
22
+ | Bun cap | `loki-ts/src/runner/budget.ts` `checkBudgetLimit()` | parity of hard cap | warn at 80% |
23
+ | Cost lib | `autonomy/lib/efficiency_cost.py` `collect_efficiency` | sum cost_usd + tokens, honest None | (this is the shared lib to reuse) |
24
+ | Per-run proof | `.loki/proofs/<run_id>/proof.json` via proof-generator.py | run_id, generated_at, cost.usd, files_changed.count, council.final_verdict, provider.model | (source for per-run history) |
25
+ | Productivity CLI | `autonomy/loki` `cmd_metrics()` (17837) | session productivity report | dedicated cost view |
26
+ | Estimate CLI | `autonomy/loki` `cmd_plan()` | pre-run cost ESTIMATE | actuals |
27
+
28
+ ## Critical data-source fact (verified)
29
+
30
+ `autonomy/run.sh:3186` wipes `.loki/metrics/efficiency/iteration-*.json` at the
31
+ start of every run. Therefore:
32
+
33
+ - `.loki/metrics/efficiency/` only ever holds the CURRENT run's iterations. It
34
+ is the source for the INTRA-RUN time-series (per-iteration, now carries a
35
+ `timestamp` field, run.sh:4246).
36
+ - Per-RUN and per-PROJECT cost OVER TIME must come from
37
+ `.loki/proofs/<run_id>/proof.json` (persistent, one dir per run, carries
38
+ `cost.usd` + `generated_at` + `provider.model`). This is the real
39
+ "cost over time" series. Using efficiency/ for it would silently show one run.
40
+
41
+ ## Deliverables
42
+
43
+ ### 1. New endpoint: `GET /api/cost/timeline` (dashboard/server.py)
44
+
45
+ Read-only. Returns two honest series plus a budget block:
46
+
47
+ ```json
48
+ {
49
+ "current_run": {
50
+ "iterations": [
51
+ {"iteration": 1, "timestamp": "...", "model": "sonnet",
52
+ "phase": "build", "input_tokens": 1500, "output_tokens": 500,
53
+ "cost_usd": 0.05, "cumulative_usd": 0.05}
54
+ ],
55
+ "total_usd": 0.05,
56
+ "cost_recorded": true
57
+ },
58
+ "runs": [
59
+ {"run_id": "...", "generated_at": "...", "model": "sonnet",
60
+ "cost_usd": 1.84, "files_changed": 3, "final_verdict": "APPROVE"}
61
+ ],
62
+ "project_total_usd": 1.89,
63
+ "runs_count": 1,
64
+ "budget": {
65
+ "limit": 50.0, "used": 1.89, "remaining": 48.11,
66
+ "percent_used": 3.78, "status": "ok",
67
+ "warn_threshold_percent": 80, "exceeded": false
68
+ }
69
+ }
70
+ ```
71
+
72
+ - `current_run.iterations` from `.loki/metrics/efficiency/iteration-*.json`,
73
+ sorted by iteration, with a running `cumulative_usd`. Cost per record:
74
+ prefer `cost_usd`; if null, price from tokens via the EXISTING
75
+ `_calculate_model_cost` helper (do not add a new pricer).
76
+ - `runs` from `.loki/proofs/*/proof.json` (reuse `_proofs_dir` + `_safe_json_read`).
77
+ - `project_total_usd` = sum of per-run proof costs (the persistent history).
78
+ - `budget.status`: "ok" (<80%), "warn" (>=80% and <100%), "exceeded" (>=100%).
79
+ Computed at read time. No budget.json schema change (avoids the
80
+ byte-identical-JSON parity trap with run.sh heredoc / budget.ts).
81
+ - `cost_recorded` distinguishes "recorded but $0" (records exist, sum 0.0) from
82
+ "not recorded" (no records) -- mirrors efficiency_cost.py honesty contract.
83
+
84
+ `/api/cost` and `/api/budget` are left UNCHANGED (existing frontend + tests
85
+ depend on them). The new endpoint is additive.
86
+
87
+ ### 2. Dashboard panel: `dashboard/static/cost.html`
88
+
89
+ Self-contained, zero-build, all CSS+JS inlined (mirrors `proofs.html`). Fetches
90
+ `/api/cost/timeline`. Shows: project total, budget gauge with a colored
91
+ warn/exceeded state and an explicit "warns at 80%, hard-stops at 100%" caption,
92
+ per-run history table, model-routing breakdown, and a simple inline-SVG
93
+ cumulative-cost line for the current run. Linked from "/".
94
+
95
+ ### 3. CLI cost view: `loki cost`
96
+
97
+ New `cmd_cost()` in `autonomy/loki` (no existing `cost` command -> free to add).
98
+ Wired into dispatch + help. Reads the same two sources via a single embedded
99
+ python3 block that imports `autonomy/lib/efficiency_cost.collect_efficiency`
100
+ for the current-run aggregate (REUSE, not a 5th copy), and reads proofs/ for
101
+ per-run history. Flags: `--json`, `--last N` (limit run history). Shows budget
102
+ status with the 80% warn line. Honest: prints "cost not recorded for this run"
103
+ when efficiency returns usd=None.
104
+
105
+ `loki cost` is chosen over `loki metrics --cost` because cost is the headline
106
+ R3 wedge and deserves a first-class verb; `loki metrics` stays a productivity
107
+ report. Bun parity for `loki cost` is OUT of scope for this slice (documented
108
+ gap; the bash route is canonical and the budget runtime warn below has Bun
109
+ parity which is the load-bearing part).
110
+
111
+ ### 4. Budget warn-at-80% (runtime, both routes)
112
+
113
+ Add a non-pausing warn when crossing 80%, keep the 100% pause:
114
+ - `autonomy/run.sh` `check_budget_limit()`: when `0.80*limit <= cost < limit`,
115
+ `log_warn` + `emit_event_json budget_warning`. Does NOT pause.
116
+ - `loki-ts/src/runner/budget.ts` `checkBudgetLimit()`: same warn semantics via
117
+ the returned result (add `warn: boolean` to `CheckBudgetResult`); orchestrator
118
+ logs it. No budget.json schema change.
119
+
120
+ ## Tests
121
+
122
+ - `tests/dashboard/test_cost_timeline_endpoint.py` (pytest, `_ForceLokiDir`
123
+ pattern): empty dirs -> 200 with honest nulls; current-run aggregation +
124
+ cumulative; per-run history from proofs; budget status thresholds
125
+ (ok/warn/exceeded) at 79/80/100%; recorded-but-zero vs not-recorded; corrupt
126
+ JSON skipped; no-PII (no absolute paths leaked).
127
+ - `loki-ts/tests/runner/budget.test.ts` (extended, bun test): warn flag true in
128
+ [80%,100%), false below 80% and at/above 100% (exceeded path), no pause file
129
+ written on warn.
130
+
131
+ ## No-PII / honesty constraints
132
+
133
+ - Endpoints return only aggregates + run_ids + model names + timestamps. No file
134
+ paths, no prompt text, no token strings. proof.json is already redacted by the
135
+ R1 generator before it lands.
136
+ - `$0.00` is never fabricated: uncollected cost surfaces as null / "not recorded".
137
+
138
+ ## Files (for the integrator to cherry-pick)
139
+
140
+ - `dashboard/server.py` (add `/api/cost/timeline`)
141
+ - `dashboard/static/cost.html` (new)
142
+ - `autonomy/loki` (add `cmd_cost` + dispatch + help)
143
+ - `autonomy/run.sh` (warn-at-80% in `check_budget_limit`)
144
+ - `loki-ts/src/runner/budget.ts` (warn flag)
145
+ - `tests/dashboard/test_cost_timeline_endpoint.py` (new)
146
+ - `loki-ts/tests/runner/budget.test.ts` (extended: warn-at-80% describe block)
147
+ - `docs/R3-COST-OBSERVABILITY-DESIGN.md` (this file)
@@ -0,0 +1,137 @@
1
+ # R7: Zero-config killer first run (time-to-first-value)
2
+
3
+ Design note for the R7 release in the competitive-stickiness arc. Worktree
4
+ deliverable for the integrator to cherry-pick. NO version bumps here.
5
+
6
+ ## Goal
7
+
8
+ Convert trials to habits. The #1 acquisition-to-retention gate is the first
9
+ run. Today a blank first run is mediocre and Loki's deep RARV-C / council can
10
+ feel heavy on run 1. R7 = a frictionless first run: a user types
11
+ `loki start "<one line>"` (or `loki start` in an existing repo) and sees a
12
+ VISIBLE valuable artifact in minutes, with depth opt-in later.
13
+
14
+ Honest "fast": we do NOT fake progress. We actually shorten the path by running
15
+ a lightweight execution profile first (capped iterations, completion council
16
+ off, simple complexity tier, heavy phases off) so the first visible artifact
17
+ plus a proof-of-run land quickly. "Go deeper" = re-run plain `loki start` for
18
+ the full RARV-C depth.
19
+
20
+ ## Verified current behavior (real code, traced 2026-06-03)
21
+
22
+ - `cmd_start()` (`autonomy/loki:746`) is the unified entry. It parses args,
23
+ calls `detect_arg_type()` (`autonomy/loki:667`), then dispatches:
24
+ issue -> `cmd_run`; prd -> sets `prd_file`; empty -> no-PRD path; unknown
25
+ -> treated as a PRD path for back-compat.
26
+ - `cmd_start` ends in `_loki_new_session_exec "$RUN_SH" ...` (`autonomy/loki:1678`).
27
+ Every branch of `_loki_new_session_exec` (`autonomy/loki:167-186`) uses
28
+ `exec`, so NOTHING after that line in `cmd_start` runs. Any end-of-run
29
+ message must live in `run.sh`, not after the exec in `cmd_start`.
30
+ - `cmd_quick()` (`autonomy/loki:8849`) already synthesizes a PRD from a
31
+ one-line task and sets the lightweight profile
32
+ (`LOKI_MAX_ITERATIONS=3`, `LOKI_COMPLEXITY=simple`,
33
+ `LOKI_COUNCIL_ENABLED=false`, heavy phases off), then execs `run.sh`.
34
+ - No-PRD + generated-PRD-reuse (v7.8.1): in `run.sh` around line 11102,
35
+ `decide_generated_prd_action()` (`run.sh:4032`) returns reuse|update|generate
36
+ for the no-arg in-repo path; signature persisted by
37
+ `persist_prd_signature_if_present()` (`run.sh:4064`).
38
+ - Proof-of-run (R1): `generate_proof_of_run()` (`run.sh:4101`) wraps
39
+ `autonomy/lib/proof-generator.py`. It runs at session end (`run.sh:13312`)
40
+ on both success and failure, gated only by `LOKI_PROOF` (NOT by council
41
+ state), writing `.loki/proofs/<run_id>/{proof.json,index.html}`. Viewable
42
+ via `loki proof list` / `loki proof open <id>` (Bun-routed, `bin/loki:119`).
43
+
44
+ ### The exact gap R7 closes (traced, not assumed)
45
+
46
+ `loki start "build a todo app"` TODAY:
47
+ 1. `detect_arg_type("build a todo app")` returns `unknown` (has spaces, no
48
+ extension, not a file, not an issue ref).
49
+ 2. The PRD-not-found guard at `autonomy/loki:1243` and `:1268` only fires for
50
+ `*.md|*.json|*.txt|*.yaml|*.yml`, so a brief with spaces slips past.
51
+ 3. `prd_file="build a todo app"` is passed to `run.sh`, which fails:
52
+ `[ERROR] PRD file not found: build a todo app`.
53
+
54
+ So the one-line-brief path is broken today. R7 makes it work. This is ADDITIVE:
55
+ no existing valid input (`.md` PRD, issue ref, single-token name) changes
56
+ behavior.
57
+
58
+ ## Design (additive, no behavior change to existing inputs)
59
+
60
+ 1. `detect_arg_type`: add a `brief` return ONLY for args that contain
61
+ whitespace and match none of the file/issue/path patterns. A single-token
62
+ `unknown` arg still falls back to PRD path (back-compat preserved).
63
+ 2. `--brief "<text>"` explicit flag: deterministic escape hatch for the rare
64
+ single-word brief (e.g. `loki start --brief "snake"`).
65
+ 3. Shared helper `synthesize_brief_prd <file> <text>`: factored so `cmd_quick`
66
+ and the new brief path write the same forward-looking PRD. The brief PRD is
67
+ written to `.loki/brief-prd-$$.md` -- DISTINCT from `.loki/generated-prd.md`
68
+ so it never pollutes the v7.8.1 generated-PRD-reuse signature logic
69
+ (generated-prd is for codebase analysis of an existing repo; brief is a
70
+ forward spec).
71
+ 4. `cmd_start` brief sub-path: set the lightweight TTFV profile (same env as
72
+ quick), synthesize the brief PRD, set `LOKI_TTFV=brief`, then continue
73
+ through the normal exec path. Upfront framing ("fast first pass") is printed
74
+ BEFORE the exec.
75
+ 5. `cmd_start` no-arg in-repo path: UNCHANGED execution (existing no-PRD +
76
+ reuse, full RARV-C depth), but set `LOKI_TTFV=repo` so the end-of-run
77
+ what-next framing appears.
78
+ 6. `run.sh` end-of-session: after proof generation, when `LOKI_TTFV` is set and
79
+ stdout is a TTY, call `print_ttfv_next_steps <mode> <result>`. The wording
80
+ BRANCHES on mode so the message always matches what actually ran:
81
+ - `brief`: lightweight first pass, council off; proof has diffs/cost/time
82
+ (NO council verdicts, because the council was disabled).
83
+ - `repo`: full-depth codebase analysis, council on; proof has
84
+ diffs/cost/time/council verdicts.
85
+ Both point at `loki proof list` / `loki proof open` (the visible artifact)
86
+ and the depth opt-in. Gated so it is silent in CI / pipes and never fires
87
+ for normal PRD runs. Factored into `print_ttfv_next_steps` so it is
88
+ unit-testable.
89
+
90
+ Honesty note: the `brief` message intentionally does NOT advertise "council
91
+ verdicts" because brief mode runs with the council off (`_collect_council` in
92
+ proof-generator.py finds no council state, so that proof section is blank on the
93
+ brief path). The `repo` message claims verdicts because the full-depth path runs
94
+ the council. This keeps the end-of-run summary truthful per the no-fabrication
95
+ rule.
96
+
97
+ ### Why fast is honest
98
+
99
+ The brief path uses the same lightweight profile `cmd_quick` already ships:
100
+ 3 iterations max, council off, simple tier, heavy phases (perf, a11y,
101
+ regression, UAT, web-research) off. That genuinely shortens the path to first
102
+ visible value. We do not print fake progress or claim work that did not happen;
103
+ the proof-of-run is generated from real `.loki/` state. Depth is opt-in: the
104
+ end-of-run message tells the user to re-run plain `loki start` (or
105
+ `loki start <prd.md>`) for the full council-gated build.
106
+
107
+ ## Parity (bash + Bun)
108
+
109
+ `loki start` and `loki quick` are NOT in the Bun shim allowlist
110
+ (`bin/loki:119`), so dispatch is bash-only by design; this change is bash-only
111
+ for the CLI surface. The runtime pieces it reuses are already shared across
112
+ routes: `proof-generator.py` (one implementation, both routes) and the no-PRD /
113
+ generated-PRD-reuse path in `run.sh` (both routes source run.sh). No Bun CLI
114
+ change is required for parity.
115
+
116
+ ## Files
117
+
118
+ - `autonomy/loki`: `detect_arg_type` brief return; `--brief` flag;
119
+ `synthesize_brief_prd` helper; `cmd_quick` refactor to use it; `cmd_start`
120
+ brief sub-path + `LOKI_TTFV` wiring; help text.
121
+ - `autonomy/run.sh`: end-of-session TTFV what-next block.
122
+ - `tests/cli/test_zero_config_first_run.sh`: new test suite.
123
+
124
+ ## Tests (no paid runs; mock via early exit)
125
+
126
+ Following `tests/cli/test_start_run_unified.sh`: extract `detect_arg_type` and
127
+ `synthesize_brief_prd` in a subshell and assert on them; force `cmd_start` to
128
+ exit before `run.sh` boots via `--provider nonexistent-provider`.
129
+
130
+ - `detect_arg_type("build a todo app")` = `brief`; single tokens still `unknown`;
131
+ `.md` still `prd`; issue refs still `issue`; empty still `empty`.
132
+ - `synthesize_brief_prd` writes a PRD containing the brief text and TTFV markers.
133
+ - `loki start "<brief>"` enters the brief path (lightweight env, not
134
+ "PRD file not found").
135
+ - `loki start --brief "<one word>"` works.
136
+ - existing-repo no-arg path still routes to no-PRD (unchanged).
137
+ - `loki start <prd.md>` (real PRD) still routes to PRD mode (no regression).