npm - loki-mode - Versions diffs - 7.10.1 → 7.12.0 - Mend

loki-mode 7.10.1 → 7.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/loki +449 -12
package/autonomy/run.sh +94 -0
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +242 -0
package/dashboard/static/cost.html +274 -0
package/dashboard/static/index.html +94 -0
package/docs/INSTALLATION.md +1 -1
package/docs/R3-COST-OBSERVABILITY-DESIGN.md +147 -0
package/docs/R7-ZERO-CONFIG-FIRST-RUN-PLAN.md +137 -0
package/loki-ts/dist/loki.js +144 -144
package/mcp/__init__.py +1 -1
package/package.json +1 -1

package/dashboard/static/index.html CHANGED Viewed

@@ -409,6 +409,40 @@
       display: block;
     }
+    .budget-banner {
+      position: fixed;
+      top: 0;
+      left: 0;
+      right: 0;
+      padding: 8px 16px;
+      text-align: center;
+      font-size: 13px;
+      font-weight: 600;
+      display: none;
+      z-index: 1000;
+      color: #201515;
+    }
+    .budget-banner.show {
+      display: block;
+    }
+    .budget-banner.warn {
+      background: var(--loki-warning);
+    }
+    .budget-banner.exceeded {
+      background: var(--loki-red);
+      color: #fff;
+    }
+    .budget-banner a {
+      color: inherit;
+      text-decoration: underline;
+      margin-left: 10px;
+      font-weight: 600;
+    }
     /* Loading state */
     .loading {
       display: flex;
@@ -581,6 +615,15 @@
     Offline - showing cached data
   </div>
+  <!-- Budget Banner (R3 anti-surprise-cost): persistent, visible on every
+       page without opening the Cost panel. Amber at >=80% (warn), red at
+       >=100% (exceeded). Driven by the existing WebSocket budget_status push
+       and a polling fallback against /api/cost/timeline. -->
+  <div class="budget-banner" id="budget-banner" role="status" aria-live="polite">
+    <span id="budget-banner-text"></span>
+    <a href="/cost" id="budget-banner-link">View cost</a>
+  </div>
   <!-- Dashboard Layout -->
   <div class="dashboard-layout">
     <!-- Sidebar -->
@@ -13609,6 +13652,57 @@ document.addEventListener('DOMContentLoaded', function() {
     document.getElementById('offline-banner').classList.add('show');
   }
+  // R3 budget banner: a persistent, page-wide indicator so a user running an
+  // overnight job sees the 80% budget warning WITHOUT opening the Cost panel.
+  // It reuses the existing WebSocket push (budget_status -> api:budget_status
+  // on the shared API client) and falls back to polling /api/cost/timeline.
+  (function initBudgetBanner() {
+    var banner = document.getElementById('budget-banner');
+    var textEl = document.getElementById('budget-banner-text');
+    if (!banner || !textEl) return;
+    function renderBudget(b) {
+      if (!b || (b.status !== 'warn' && b.status !== 'exceeded')) {
+        banner.classList.remove('show', 'warn', 'exceeded');
+        return;
+      }
+      // Honest copy: "Budget at 82% - hard stop at 100%."
+      var pct = (b.percent_used === null || b.percent_used === undefined)
+        ? null : Number(b.percent_used);
+      var pctTxt = (pct === null || !isFinite(pct)) ? '' : Math.round(pct) + '%';
+      var msg;
+      if (b.status === 'exceeded') {
+        msg = 'Budget cap reached' + (pctTxt ? ' (' + pctTxt + ')' : '') +
+              '. The run is paused to prevent a surprise bill.';
+      } else {
+        msg = 'Budget at ' + (pctTxt || 'over 80%') + ' - hard stop at 100%.';
+      }
+      textEl.textContent = msg;
+      banner.classList.remove('warn', 'exceeded');
+      banner.classList.add('show', b.status);
+    }
+    // Polling fallback (the WS push is best-effort; polling guarantees the
+    // banner is correct even on a freshly opened page or after a reconnect).
+    function poll() {
+      fetch('/api/cost/timeline', { headers: { 'Accept': 'application/json' } })
+        .then(function (r) { return r.ok ? r.json() : null; })
+        .then(function (d) { if (d && d.budget) renderBudget(d.budget); })
+        .catch(function () { /* offline / no endpoint: leave banner as-is */ });
+    }
+    poll();
+    setInterval(poll, 15000);
+    // Reuse the existing shared WebSocket client for the proactive push.
+    try {
+      var api = LokiDashboard.getApiClient({ baseUrl: window.location.origin });
+      api.addEventListener('api:budget_status', function (e) {
+        renderBudget(e && e.detail);
+      });
+      api.connect().catch(function () { /* polling fallback still covers it */ });
+    } catch (err) { /* polling fallback still covers it */ }
+  })();
   // Mobile menu toggle
   var mobileMenuBtn = document.getElementById('mobile-menu-btn');
   var sidebar = document.getElementById('sidebar');

package/docs/INSTALLATION.md CHANGED Viewed

@@ -2,7 +2,7 @@
 The flagship product of [Autonomi](https://www.autonomi.dev/). Complete installation instructions for all platforms and use cases.
-**Version:** v7.10.1
+**Version:** v7.12.0
 ---

package/docs/R3-COST-OBSERVABILITY-DESIGN.md ADDED Viewed

@@ -0,0 +1,147 @@
+# R3: Cost + Observability Dashboard (anti-surprise-cost wedge)
+Design note. Verified against live source on 2026-06-03 (v7.8.3 worktree).
+No version bumps, no commits to main. This file is a design artifact for the
+integrator; cherry-pick the implementation files listed at the bottom.
+## Goal
+Counter the #1 competitor churn driver (surprise cost) with TRANSPARENT cost:
+per-run and per-project cost USD over time, model-routing visibility, token
+burn, and budget caps that WARN before the cap (at 80%) rather than surprise
+the user, while preserving the existing hard-stop at 100%.
+## What already exists (reuse, do NOT duplicate)
+| Surface | Location | Has | Missing for R3 |
+|---|---|---|---|
+| Aggregate cost | `dashboard/server.py` `GET /api/cost` (~4391) | totals, by_phase, by_model, basic budget | per-run history, time-series, warn status |
+| Budget status | `dashboard/server.py` `GET /api/budget` (~4498) | limit, used, exceeded, remaining | warn-at-80% status field |
+| Pricing | `dashboard/server.py` `GET /api/pricing` (~4575) | model price table | -- |
+| Hard cap | `autonomy/run.sh` `check_budget_limit()` (8333) | pause + signal at >=100% | warn at 80% (no pause) |
+| Bun cap | `loki-ts/src/runner/budget.ts` `checkBudgetLimit()` | parity of hard cap | warn at 80% |
+| Cost lib | `autonomy/lib/efficiency_cost.py` `collect_efficiency` | sum cost_usd + tokens, honest None | (this is the shared lib to reuse) |
+| Per-run proof | `.loki/proofs/<run_id>/proof.json` via proof-generator.py | run_id, generated_at, cost.usd, files_changed.count, council.final_verdict, provider.model | (source for per-run history) |
+| Productivity CLI | `autonomy/loki` `cmd_metrics()` (17837) | session productivity report | dedicated cost view |
+| Estimate CLI | `autonomy/loki` `cmd_plan()` | pre-run cost ESTIMATE | actuals |
+## Critical data-source fact (verified)
+`autonomy/run.sh:3186` wipes `.loki/metrics/efficiency/iteration-*.json` at the
+start of every run. Therefore:
+- `.loki/metrics/efficiency/` only ever holds the CURRENT run's iterations. It
+  is the source for the INTRA-RUN time-series (per-iteration, now carries a
+  `timestamp` field, run.sh:4246).
+- Per-RUN and per-PROJECT cost OVER TIME must come from
+  `.loki/proofs/<run_id>/proof.json` (persistent, one dir per run, carries
+  `cost.usd` + `generated_at` + `provider.model`). This is the real
+  "cost over time" series. Using efficiency/ for it would silently show one run.
+## Deliverables
+### 1. New endpoint: `GET /api/cost/timeline` (dashboard/server.py)
+Read-only. Returns two honest series plus a budget block:
+```json
+{
+  "current_run": {
+    "iterations": [
+      {"iteration": 1, "timestamp": "...", "model": "sonnet",
+       "phase": "build", "input_tokens": 1500, "output_tokens": 500,
+       "cost_usd": 0.05, "cumulative_usd": 0.05}
+    ],
+    "total_usd": 0.05,
+    "cost_recorded": true
+  },
+  "runs": [
+    {"run_id": "...", "generated_at": "...", "model": "sonnet",
+     "cost_usd": 1.84, "files_changed": 3, "final_verdict": "APPROVE"}
+  ],
+  "project_total_usd": 1.89,
+  "runs_count": 1,
+  "budget": {
+    "limit": 50.0, "used": 1.89, "remaining": 48.11,
+    "percent_used": 3.78, "status": "ok",
+    "warn_threshold_percent": 80, "exceeded": false
+  }
+}
+```
+- `current_run.iterations` from `.loki/metrics/efficiency/iteration-*.json`,
+  sorted by iteration, with a running `cumulative_usd`. Cost per record:
+  prefer `cost_usd`; if null, price from tokens via the EXISTING
+  `_calculate_model_cost` helper (do not add a new pricer).
+- `runs` from `.loki/proofs/*/proof.json` (reuse `_proofs_dir` + `_safe_json_read`).
+- `project_total_usd` = sum of per-run proof costs (the persistent history).
+- `budget.status`: "ok" (<80%), "warn" (>=80% and <100%), "exceeded" (>=100%).
+  Computed at read time. No budget.json schema change (avoids the
+  byte-identical-JSON parity trap with run.sh heredoc / budget.ts).
+- `cost_recorded` distinguishes "recorded but $0" (records exist, sum 0.0) from
+  "not recorded" (no records) -- mirrors efficiency_cost.py honesty contract.
+`/api/cost` and `/api/budget` are left UNCHANGED (existing frontend + tests
+depend on them). The new endpoint is additive.
+### 2. Dashboard panel: `dashboard/static/cost.html`
+Self-contained, zero-build, all CSS+JS inlined (mirrors `proofs.html`). Fetches
+`/api/cost/timeline`. Shows: project total, budget gauge with a colored
+warn/exceeded state and an explicit "warns at 80%, hard-stops at 100%" caption,
+per-run history table, model-routing breakdown, and a simple inline-SVG
+cumulative-cost line for the current run. Linked from "/".
+### 3. CLI cost view: `loki cost`
+New `cmd_cost()` in `autonomy/loki` (no existing `cost` command -> free to add).
+Wired into dispatch + help. Reads the same two sources via a single embedded
+python3 block that imports `autonomy/lib/efficiency_cost.collect_efficiency`
+for the current-run aggregate (REUSE, not a 5th copy), and reads proofs/ for
+per-run history. Flags: `--json`, `--last N` (limit run history). Shows budget
+status with the 80% warn line. Honest: prints "cost not recorded for this run"
+when efficiency returns usd=None.
+`loki cost` is chosen over `loki metrics --cost` because cost is the headline
+R3 wedge and deserves a first-class verb; `loki metrics` stays a productivity
+report. Bun parity for `loki cost` is OUT of scope for this slice (documented
+gap; the bash route is canonical and the budget runtime warn below has Bun
+parity which is the load-bearing part).
+### 4. Budget warn-at-80% (runtime, both routes)
+Add a non-pausing warn when crossing 80%, keep the 100% pause:
+- `autonomy/run.sh` `check_budget_limit()`: when `0.80*limit <= cost < limit`,
+  `log_warn` + `emit_event_json budget_warning`. Does NOT pause.
+- `loki-ts/src/runner/budget.ts` `checkBudgetLimit()`: same warn semantics via
+  the returned result (add `warn: boolean` to `CheckBudgetResult`); orchestrator
+  logs it. No budget.json schema change.
+## Tests
+- `tests/dashboard/test_cost_timeline_endpoint.py` (pytest, `_ForceLokiDir`
+  pattern): empty dirs -> 200 with honest nulls; current-run aggregation +
+  cumulative; per-run history from proofs; budget status thresholds
+  (ok/warn/exceeded) at 79/80/100%; recorded-but-zero vs not-recorded; corrupt
+  JSON skipped; no-PII (no absolute paths leaked).
+- `loki-ts/tests/runner/budget.test.ts` (extended, bun test): warn flag true in
+  [80%,100%), false below 80% and at/above 100% (exceeded path), no pause file
+  written on warn.
+## No-PII / honesty constraints
+- Endpoints return only aggregates + run_ids + model names + timestamps. No file
+  paths, no prompt text, no token strings. proof.json is already redacted by the
+  R1 generator before it lands.
+- `$0.00` is never fabricated: uncollected cost surfaces as null / "not recorded".
+## Files (for the integrator to cherry-pick)
+- `dashboard/server.py` (add `/api/cost/timeline`)
+- `dashboard/static/cost.html` (new)
+- `autonomy/loki` (add `cmd_cost` + dispatch + help)
+- `autonomy/run.sh` (warn-at-80% in `check_budget_limit`)
+- `loki-ts/src/runner/budget.ts` (warn flag)
+- `tests/dashboard/test_cost_timeline_endpoint.py` (new)
+- `loki-ts/tests/runner/budget.test.ts` (extended: warn-at-80% describe block)
+- `docs/R3-COST-OBSERVABILITY-DESIGN.md` (this file)

package/docs/R7-ZERO-CONFIG-FIRST-RUN-PLAN.md ADDED Viewed

@@ -0,0 +1,137 @@
+# R7: Zero-config killer first run (time-to-first-value)
+Design note for the R7 release in the competitive-stickiness arc. Worktree
+deliverable for the integrator to cherry-pick. NO version bumps here.
+## Goal
+Convert trials to habits. The #1 acquisition-to-retention gate is the first
+run. Today a blank first run is mediocre and Loki's deep RARV-C / council can
+feel heavy on run 1. R7 = a frictionless first run: a user types
+`loki start "<one line>"` (or `loki start` in an existing repo) and sees a
+VISIBLE valuable artifact in minutes, with depth opt-in later.
+Honest "fast": we do NOT fake progress. We actually shorten the path by running
+a lightweight execution profile first (capped iterations, completion council
+off, simple complexity tier, heavy phases off) so the first visible artifact
+plus a proof-of-run land quickly. "Go deeper" = re-run plain `loki start` for
+the full RARV-C depth.
+## Verified current behavior (real code, traced 2026-06-03)
+- `cmd_start()` (`autonomy/loki:746`) is the unified entry. It parses args,
+  calls `detect_arg_type()` (`autonomy/loki:667`), then dispatches:
+  issue -> `cmd_run`; prd -> sets `prd_file`; empty -> no-PRD path; unknown
+  -> treated as a PRD path for back-compat.
+- `cmd_start` ends in `_loki_new_session_exec "$RUN_SH" ...` (`autonomy/loki:1678`).
+  Every branch of `_loki_new_session_exec` (`autonomy/loki:167-186`) uses
+  `exec`, so NOTHING after that line in `cmd_start` runs. Any end-of-run
+  message must live in `run.sh`, not after the exec in `cmd_start`.
+- `cmd_quick()` (`autonomy/loki:8849`) already synthesizes a PRD from a
+  one-line task and sets the lightweight profile
+  (`LOKI_MAX_ITERATIONS=3`, `LOKI_COMPLEXITY=simple`,
+  `LOKI_COUNCIL_ENABLED=false`, heavy phases off), then execs `run.sh`.
+- No-PRD + generated-PRD-reuse (v7.8.1): in `run.sh` around line 11102,
+  `decide_generated_prd_action()` (`run.sh:4032`) returns reuse|update|generate
+  for the no-arg in-repo path; signature persisted by
+  `persist_prd_signature_if_present()` (`run.sh:4064`).
+- Proof-of-run (R1): `generate_proof_of_run()` (`run.sh:4101`) wraps
+  `autonomy/lib/proof-generator.py`. It runs at session end (`run.sh:13312`)
+  on both success and failure, gated only by `LOKI_PROOF` (NOT by council
+  state), writing `.loki/proofs/<run_id>/{proof.json,index.html}`. Viewable
+  via `loki proof list` / `loki proof open <id>` (Bun-routed, `bin/loki:119`).
+### The exact gap R7 closes (traced, not assumed)
+`loki start "build a todo app"` TODAY:
+1. `detect_arg_type("build a todo app")` returns `unknown` (has spaces, no
+   extension, not a file, not an issue ref).
+2. The PRD-not-found guard at `autonomy/loki:1243` and `:1268` only fires for
+   `*.md|*.json|*.txt|*.yaml|*.yml`, so a brief with spaces slips past.
+3. `prd_file="build a todo app"` is passed to `run.sh`, which fails:
+   `[ERROR] PRD file not found: build a todo app`.
+So the one-line-brief path is broken today. R7 makes it work. This is ADDITIVE:
+no existing valid input (`.md` PRD, issue ref, single-token name) changes
+behavior.
+## Design (additive, no behavior change to existing inputs)
+1. `detect_arg_type`: add a `brief` return ONLY for args that contain
+   whitespace and match none of the file/issue/path patterns. A single-token
+   `unknown` arg still falls back to PRD path (back-compat preserved).
+2. `--brief "<text>"` explicit flag: deterministic escape hatch for the rare
+   single-word brief (e.g. `loki start --brief "snake"`).
+3. Shared helper `synthesize_brief_prd <file> <text>`: factored so `cmd_quick`
+   and the new brief path write the same forward-looking PRD. The brief PRD is
+   written to `.loki/brief-prd-$$.md` -- DISTINCT from `.loki/generated-prd.md`
+   so it never pollutes the v7.8.1 generated-PRD-reuse signature logic
+   (generated-prd is for codebase analysis of an existing repo; brief is a
+   forward spec).
+4. `cmd_start` brief sub-path: set the lightweight TTFV profile (same env as
+   quick), synthesize the brief PRD, set `LOKI_TTFV=brief`, then continue
+   through the normal exec path. Upfront framing ("fast first pass") is printed
+   BEFORE the exec.
+5. `cmd_start` no-arg in-repo path: UNCHANGED execution (existing no-PRD +
+   reuse, full RARV-C depth), but set `LOKI_TTFV=repo` so the end-of-run
+   what-next framing appears.
+6. `run.sh` end-of-session: after proof generation, when `LOKI_TTFV` is set and
+   stdout is a TTY, call `print_ttfv_next_steps <mode> <result>`. The wording
+   BRANCHES on mode so the message always matches what actually ran:
+   - `brief`: lightweight first pass, council off; proof has diffs/cost/time
+     (NO council verdicts, because the council was disabled).
+   - `repo`: full-depth codebase analysis, council on; proof has
+     diffs/cost/time/council verdicts.
+   Both point at `loki proof list` / `loki proof open` (the visible artifact)
+   and the depth opt-in. Gated so it is silent in CI / pipes and never fires
+   for normal PRD runs. Factored into `print_ttfv_next_steps` so it is
+   unit-testable.
+Honesty note: the `brief` message intentionally does NOT advertise "council
+verdicts" because brief mode runs with the council off (`_collect_council` in
+proof-generator.py finds no council state, so that proof section is blank on the
+brief path). The `repo` message claims verdicts because the full-depth path runs
+the council. This keeps the end-of-run summary truthful per the no-fabrication
+rule.
+### Why fast is honest
+The brief path uses the same lightweight profile `cmd_quick` already ships:
+3 iterations max, council off, simple tier, heavy phases (perf, a11y,
+regression, UAT, web-research) off. That genuinely shortens the path to first
+visible value. We do not print fake progress or claim work that did not happen;
+the proof-of-run is generated from real `.loki/` state. Depth is opt-in: the
+end-of-run message tells the user to re-run plain `loki start` (or
+`loki start <prd.md>`) for the full council-gated build.
+## Parity (bash + Bun)
+`loki start` and `loki quick` are NOT in the Bun shim allowlist
+(`bin/loki:119`), so dispatch is bash-only by design; this change is bash-only
+for the CLI surface. The runtime pieces it reuses are already shared across
+routes: `proof-generator.py` (one implementation, both routes) and the no-PRD /
+generated-PRD-reuse path in `run.sh` (both routes source run.sh). No Bun CLI
+change is required for parity.
+## Files
+- `autonomy/loki`: `detect_arg_type` brief return; `--brief` flag;
+  `synthesize_brief_prd` helper; `cmd_quick` refactor to use it; `cmd_start`
+  brief sub-path + `LOKI_TTFV` wiring; help text.
+- `autonomy/run.sh`: end-of-session TTFV what-next block.
+- `tests/cli/test_zero_config_first_run.sh`: new test suite.
+## Tests (no paid runs; mock via early exit)
+Following `tests/cli/test_start_run_unified.sh`: extract `detect_arg_type` and
+`synthesize_brief_prd` in a subshell and assert on them; force `cmd_start` to
+exit before `run.sh` boots via `--provider nonexistent-provider`.
+- `detect_arg_type("build a todo app")` = `brief`; single tokens still `unknown`;
+  `.md` still `prd`; issue refs still `issue`; empty still `empty`.
+- `synthesize_brief_prd` writes a PRD containing the brief text and TTFV markers.
+- `loki start "<brief>"` enters the brief path (lightweight env, not
+  "PRD file not found").
+- `loki start --brief "<one word>"` works.
+- existing-repo no-arg path still routes to no-PRD (unchanged).
+- `loki start <prd.md>` (real PRD) still routes to PRD mode (no regression).