loki-mode 7.15.0 → 7.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -460,6 +460,7 @@ async def _push_loki_state_loop() -> None:
460
460
  last_mtime: float = 0.0
461
461
  _last_skill_hash: str = "" # Track skill-session state changes
462
462
  _last_budget_status: str = "" # Track budget-status transitions (R3)
463
+ _last_trust_signature: str = "" # Track trust-trajectory changes (R4)
463
464
  while True:
464
465
  try:
465
466
  if not manager.active_connections:
@@ -490,6 +491,30 @@ async def _push_loki_state_loop() -> None:
490
491
  except (OSError, ValueError, KeyError):
491
492
  pass
492
493
 
494
+ # R4 visible trust trajectory: proactively push a trust_update when
495
+ # the trajectory's improving/regressing tally changes (e.g. a new
496
+ # run just landed a council pass), so an open dashboard reflects the
497
+ # earned-autonomy trend without a manual refresh. Mirrors the R3
498
+ # budget_status transition push; reuses manager.broadcast (no second
499
+ # channel). Signature gates the push so we only broadcast on change.
500
+ try:
501
+ _tmod = _load_trust_module()
502
+ if _tmod is not None:
503
+ _traj = _tmod.compute_trajectory(str(loki_dir))
504
+ _sig = "%d:%d:%d" % (
505
+ _traj.get("runs_count", 0),
506
+ _traj.get("improving_count", 0),
507
+ _traj.get("regressing_count", 0),
508
+ )
509
+ if _sig != _last_trust_signature:
510
+ await manager.broadcast({
511
+ "type": "trust_update",
512
+ "data": _traj,
513
+ })
514
+ _last_trust_signature = _sig
515
+ except (OSError, ValueError, KeyError):
516
+ pass
517
+
493
518
  _broadcast_sent = False
494
519
 
495
520
  if state_file.exists():
@@ -4780,6 +4805,81 @@ async def get_cost_timeline():
4780
4805
  }
4781
4806
 
4782
4807
 
4808
+ # =============================================================================
4809
+ # Trust trajectory API (R4): is the agent earning autonomy on THIS repo?
4810
+ # =============================================================================
4811
+
4812
+ _TRUST_MODULE = None # cached import of autonomy/lib/trust_trajectory.py
4813
+
4814
+
4815
+ def _load_trust_module():
4816
+ """Import the shared trust-trajectory derivation (single source of truth).
4817
+
4818
+ The derivation lives in autonomy/lib/trust_trajectory.py so the dashboard
4819
+ endpoint, the bash `cmd_trust`, and the test suite all agree. Loaded via
4820
+ importlib because autonomy/lib is not an importable package. Cached after
4821
+ first load. Returns None if the module cannot be found (degraded mode).
4822
+ """
4823
+ global _TRUST_MODULE
4824
+ if _TRUST_MODULE is not None:
4825
+ return _TRUST_MODULE
4826
+ repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
4827
+ mod_path = os.path.join(repo_root, "autonomy", "lib", "trust_trajectory.py")
4828
+ if not os.path.isfile(mod_path):
4829
+ return None
4830
+ try:
4831
+ import importlib.util as _ilu
4832
+ spec = _ilu.spec_from_file_location("trust_trajectory", mod_path)
4833
+ if spec is None or spec.loader is None:
4834
+ return None
4835
+ mod = _ilu.module_from_spec(spec)
4836
+ spec.loader.exec_module(mod)
4837
+ _TRUST_MODULE = mod
4838
+ return mod
4839
+ except Exception:
4840
+ return None
4841
+
4842
+
4843
+ @app.get("/api/trust/trajectory")
4844
+ async def get_trust_trajectory():
4845
+ """Per-project trust trajectory derived from proof-of-run history.
4846
+
4847
+ Mirrors /api/cost/timeline: reads the persistent per-run records under
4848
+ .loki/proofs/<run_id>/proof.json (the same source R3 cost history uses) and
4849
+ derives whether the agent is earning autonomy on THIS repo over time:
4850
+ council pass-rate, gate pass-rate, iterations-to-completion, and (when
4851
+ recorded) human interventions, each with an up/down/flat direction and an
4852
+ `improving` flag that already accounts for per-axis polarity.
4853
+
4854
+ Honest-data rule: with fewer than 2 recorded runs the response is
4855
+ insufficient=True and NO direction is fabricated. Every number derives from
4856
+ real proof.json values; a missing axis is reported available=False, never a
4857
+ misleading zero. No PII leaves the derivation (only run_id, timestamps, and
4858
+ derived numeric axes).
4859
+ """
4860
+ loki_dir = _get_loki_dir()
4861
+ mod = _load_trust_module()
4862
+ if mod is None:
4863
+ return {
4864
+ "schema_version": 1,
4865
+ "available": False,
4866
+ "error": "trust_trajectory module not found",
4867
+ "runs_count": 0,
4868
+ "insufficient": True,
4869
+ "axes": {},
4870
+ "series": [],
4871
+ "notes": ["trust derivation module unavailable in this install"],
4872
+ }
4873
+ traj = mod.compute_trajectory(str(loki_dir))
4874
+ # Best-effort cache write so other surfaces share one source of truth.
4875
+ try:
4876
+ mod.write_trajectory_cache(str(loki_dir), traj)
4877
+ except Exception:
4878
+ pass
4879
+ traj["available"] = True
4880
+ return traj
4881
+
4882
+
4783
4883
  # =============================================================================
4784
4884
  # Pricing API
4785
4885
  # =============================================================================
@@ -6764,6 +6864,18 @@ async def serve_cost_panel():
6764
6864
  return Response(status_code=404)
6765
6865
 
6766
6866
 
6867
+ # R4: standalone trust-trajectory page that fetches /api/trust/trajectory.
6868
+ # Mirrors the cost.html / /cost pattern: works without the SPA build.
6869
+ @app.get("/trust", include_in_schema=False)
6870
+ async def serve_trust_panel():
6871
+ """Serve the standalone trust-trajectory HTML panel."""
6872
+ if STATIC_DIR:
6873
+ trust_path = os.path.join(STATIC_DIR, "trust.html")
6874
+ if os.path.isfile(trust_path):
6875
+ return FileResponse(trust_path, media_type="text/html")
6876
+ return Response(status_code=404)
6877
+
6878
+
6767
6879
  # Serve index.html or standalone HTML for root
6768
6880
  @app.get("/", include_in_schema=False)
6769
6881
  async def serve_index():
@@ -679,6 +679,10 @@
679
679
  <svg viewBox="0 0 24 24"><line x1="12" y1="1" x2="12" y2="23"/><path d="M17 5H9.5a3.5 3.5 0 000 7h5a3.5 3.5 0 010 7H6"/></svg>
680
680
  Cost
681
681
  </button>
682
+ <button class="nav-link" data-section="trust" id="nav-trust">
683
+ <svg viewBox="0 0 24 24"><polyline points="3 17 9 11 13 15 21 7" fill="none" stroke="currentColor" stroke-width="2"/><polyline points="15 7 21 7 21 13" fill="none" stroke="currentColor" stroke-width="2"/></svg>
684
+ Trust
685
+ </button>
682
686
  <button class="nav-link" data-section="checkpoint" id="nav-checkpoint">
683
687
  <svg viewBox="0 0 24 24"><path d="M19 21H5a2 2 0 01-2-2V5a2 2 0 012-2h11l5 5v11a2 2 0 01-2 2z"/><polyline points="17 21 17 13 7 13 7 21"/><polyline points="7 3 7 8 15 8"/></svg>
684
688
  Checkpoints
@@ -1109,6 +1113,17 @@
1109
1113
  <loki-cost-dashboard id="cost-dashboard"></loki-cost-dashboard>
1110
1114
  </div>
1111
1115
 
1116
+ <!-- Trust Trajectory (R4): embeds the standalone /trust panel so the SPA
1117
+ and the build-free page share one renderer + one /api/trust/trajectory
1118
+ source. Mirrors the cost panel wiring. -->
1119
+ <div class="section-page" id="page-trust">
1120
+ <div class="section-page-header">
1121
+ <h2 class="section-page-title">Trust Trajectory</h2>
1122
+ </div>
1123
+ <iframe id="trust-frame" title="Trust trajectory" src="about:blank"
1124
+ style="width:100%;height:calc(100vh - 160px);border:0;border-radius:8px;background:#0f1115;"></iframe>
1125
+ </div>
1126
+
1112
1127
  <!-- Checkpoints -->
1113
1128
  <div class="section-page" id="page-checkpoint">
1114
1129
  <div class="section-page-header">
@@ -13808,6 +13823,15 @@ document.addEventListener('DOMContentLoaded', function() {
13808
13823
  if (pageEl) {
13809
13824
  pageEl.classList.add('active');
13810
13825
  }
13826
+ // R4: lazy-load the trust panel iframe on first open (avoids a fetch on
13827
+ // every page that the user never visits).
13828
+ if (sectionId === 'trust') {
13829
+ var tframe = document.getElementById('trust-frame');
13830
+ if (tframe && (!tframe.src || tframe.src === 'about:blank' ||
13831
+ tframe.getAttribute('src') === 'about:blank')) {
13832
+ tframe.src = '/trust';
13833
+ }
13834
+ }
13811
13835
  // Update nav active state
13812
13836
  navLinks.forEach(function(link) { link.classList.remove('active'); });
13813
13837
  var navEl = document.querySelector('.nav-link[data-section="' + sectionId + '"]');
@@ -13834,7 +13858,7 @@ document.addEventListener('DOMContentLoaded', function() {
13834
13858
  document.addEventListener('keydown', function(e) {
13835
13859
  if ((e.metaKey || e.ctrlKey) && ((e.key >= '1' && e.key <= '9') || e.key === '0')) {
13836
13860
  e.preventDefault();
13837
- var sections = ['overview', 'insights', 'prd-checklist', 'app-runner', 'council', 'quality', 'cost', 'checkpoint', 'context', 'notifications', 'migration', 'analytics', 'escalations'];
13861
+ var sections = ['overview', 'insights', 'prd-checklist', 'app-runner', 'council', 'quality', 'cost', 'trust', 'checkpoint', 'context', 'notifications', 'migration', 'analytics', 'escalations'];
13838
13862
  var idx = e.key === '0' ? 9 : parseInt(e.key) - 1;
13839
13863
  if (idx < sections.length) switchSection(sections[idx]);
13840
13864
  }
@@ -13875,7 +13899,7 @@ document.addEventListener('DOMContentLoaded', function() {
13875
13899
  // Skip if modifier keys are held (let browser defaults work)
13876
13900
  if (e.metaKey || e.ctrlKey || e.altKey) return;
13877
13901
 
13878
- var sections = ['overview', 'insights', 'prd-checklist', 'app-runner', 'council', 'quality', 'cost', 'checkpoint', 'context', 'notifications', 'migration', 'analytics', 'escalations'];
13902
+ var sections = ['overview', 'insights', 'prd-checklist', 'app-runner', 'council', 'quality', 'cost', 'trust', 'checkpoint', 'context', 'notifications', 'migration', 'analytics', 'escalations'];
13879
13903
 
13880
13904
  switch (e.key) {
13881
13905
  // Section navigation: 1-9, 0
@@ -0,0 +1,271 @@
1
+ <!DOCTYPE html>
2
+ <!--
3
+ Loki Mode - Trust trajectory panel (R4, zero-build standalone).
4
+
5
+ Self-contained: all CSS + JS inlined, no external resources. Fetches
6
+ /api/trust/trajectory and renders, per project over runs/time, whether the
7
+ agent is EARNING autonomy on THIS repo: council pass-rate, gate pass-rate,
8
+ iterations-to-completion, and (when recorded) human interventions, each with
9
+ an up/down/flat direction and an inline-SVG sparkline.
10
+
11
+ The story no competitor tells. Honest-data rule: with fewer than 2 runs this
12
+ shows "not enough history yet", never a fabricated trend.
13
+ -->
14
+ <html lang="en">
15
+ <head>
16
+ <meta charset="utf-8">
17
+ <meta name="viewport" content="width=device-width, initial-scale=1">
18
+ <title>Loki Mode - Trust Trajectory</title>
19
+ <style>
20
+ :root {
21
+ --bg: #0f1115; --panel: #171a21; --panel-2: #1d2129; --border: #2a2f3a;
22
+ --text: #e7e9ee; --muted: #9aa1ad; --faint: #6b7280; --accent: #6f7bf7;
23
+ --green: #34d399; --red: #f87171; --amber: #fbbf24;
24
+ --mono: ui-monospace, "SF Mono", "Menlo", "Consolas", monospace;
25
+ --sans: 'Inter', system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
26
+ }
27
+ * { box-sizing: border-box; }
28
+ body { margin: 0; background: var(--bg); color: var(--text); font-family: var(--sans); line-height: 1.5; }
29
+ a { color: var(--accent); text-decoration: none; }
30
+ a:hover { text-decoration: underline; }
31
+ .wrap { max-width: 960px; margin: 0 auto; padding: 40px 20px 80px; }
32
+ .head { display: flex; align-items: baseline; justify-content: space-between; margin-bottom: 8px; }
33
+ h1 { font-size: 24px; font-weight: 650; letter-spacing: -0.3px; margin: 0; }
34
+ h2 { font-size: 15px; font-weight: 600; color: var(--muted); margin: 30px 0 12px; text-transform: uppercase; letter-spacing: 0.5px; }
35
+ .head a { font-size: 13px; }
36
+ .sub { color: var(--muted); font-size: 14px; margin: 0 0 26px; }
37
+ .cards { display: flex; gap: 14px; flex-wrap: wrap; }
38
+ .card { flex: 1 1 200px; background: var(--panel); border: 1px solid var(--border); border-radius: 12px; padding: 16px 18px; }
39
+ .card .label { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.5px; }
40
+ .card .val { font-family: var(--mono); font-size: 26px; font-weight: 650; margin-top: 6px; }
41
+ .card .note { color: var(--faint); font-size: 12px; margin-top: 4px; }
42
+ .axes { display: flex; flex-direction: column; gap: 12px; }
43
+ .axis { background: var(--panel); border: 1px solid var(--border); border-radius: 12px; padding: 16px 18px; display: flex; align-items: center; gap: 16px; }
44
+ .axis .meta { flex: 1 1 auto; min-width: 0; }
45
+ .axis .name { font-size: 14px; font-weight: 600; }
46
+ .axis .desc { color: var(--faint); font-size: 12px; margin-top: 2px; }
47
+ .axis .spark { flex: 0 0 200px; }
48
+ .axis .verdict { flex: 0 0 150px; text-align: right; }
49
+ .axis .dir { font-family: var(--mono); font-size: 14px; font-weight: 650; }
50
+ .axis .tag { font-size: 12px; margin-top: 2px; }
51
+ .dir.up { color: var(--green); }
52
+ .dir.down { color: var(--green); }
53
+ .dir.bad { color: var(--red); }
54
+ .dir.flat { color: var(--muted); }
55
+ .tag.good { color: var(--green); }
56
+ .tag.bad { color: var(--red); }
57
+ .tag.flat { color: var(--muted); }
58
+ .tag.na { color: var(--faint); }
59
+ svg { display: block; width: 100%; height: 40px; }
60
+ table { width: 100%; border-collapse: collapse; font-size: 13px; }
61
+ th, td { text-align: left; padding: 8px 10px; border-bottom: 1px solid var(--border); }
62
+ th { color: var(--muted); font-weight: 600; font-size: 12px; text-transform: uppercase; letter-spacing: 0.4px; }
63
+ td.num, th.num { text-align: right; font-family: var(--mono); }
64
+ .badge { font-size: 12px; font-weight: 600; padding: 2px 8px; border-radius: 6px; border: 1px solid var(--border); }
65
+ .b-approve { color: var(--green); border-color: rgba(52,211,153,0.4); }
66
+ .b-reject { color: var(--red); border-color: rgba(248,113,113,0.4); }
67
+ .empty { color: var(--muted); background: var(--panel); border: 1px solid var(--border); border-radius: 12px; padding: 24px; text-align: center; }
68
+ .empty code { font-family: var(--mono); color: var(--text); background: var(--panel-2); padding: 2px 6px; border-radius: 5px; }
69
+ .headline { background: var(--panel); border: 1px solid var(--border); border-radius: 12px; padding: 18px; font-size: 15px; }
70
+ .headline.good { border-color: rgba(52,211,153,0.4); }
71
+ .headline.bad { border-color: rgba(248,113,113,0.4); }
72
+ .mono { font-family: var(--mono); }
73
+ .muted { color: var(--muted); }
74
+ </style>
75
+ </head>
76
+ <body>
77
+ <div class="wrap">
78
+ <div class="head">
79
+ <h1>Trust Trajectory</h1>
80
+ <a href="/">Back to dashboard</a>
81
+ </div>
82
+ <p class="sub">Is the agent earning autonomy on THIS repo? Council pass-rate, gate pass-rate, iterations-to-completion, and human interventions over your run history. Real council and RARV-C data, never a fabricated trend.</p>
83
+ <div id="content"><p class="sub">Loading...</p></div>
84
+ </div>
85
+ <script>
86
+ (function () {
87
+ "use strict";
88
+ function esc(s) {
89
+ s = (s === null || s === undefined) ? "" : String(s);
90
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
91
+ .replace(/"/g, "&quot;").replace(/'/g, "&#39;");
92
+ }
93
+ function pct(n) {
94
+ if (n === null || n === undefined) return "n/a";
95
+ n = Number(n);
96
+ if (!isFinite(n)) return "n/a";
97
+ return (n * 100).toFixed(0) + "%";
98
+ }
99
+ function num(n) {
100
+ if (n === null || n === undefined) return "n/a";
101
+ n = Number(n);
102
+ if (!isFinite(n)) return "n/a";
103
+ return String(Math.round(n * 100) / 100);
104
+ }
105
+ function badgeClass(v) {
106
+ v = String(v || "").toUpperCase();
107
+ if (v.indexOf("APPROVE") === 0 || v.indexOf("COMPLETE") === 0 || v === "PASS" || v === "PASSED") return "b-approve";
108
+ if (v.indexOf("REJECT") === 0 || v.indexOf("BLOCK") === 0 || v === "FAIL") return "b-reject";
109
+ return "";
110
+ }
111
+ // Arrow glyphs only (no emoji): ^ up, v down, - flat.
112
+ function arrow(direction) {
113
+ if (direction === "up") return "^";
114
+ if (direction === "down") return "v";
115
+ return "-";
116
+ }
117
+ // Build an inline-SVG sparkline from a numeric series (nulls skipped).
118
+ function sparkline(values, higherIsBetter) {
119
+ var pts = [];
120
+ var idx = [];
121
+ for (var i = 0; i < values.length; i++) {
122
+ var v = values[i];
123
+ if (v === null || v === undefined || !isFinite(Number(v))) continue;
124
+ pts.push(Number(v)); idx.push(i);
125
+ }
126
+ if (pts.length === 0) return '<span class="muted" style="font-size:12px;">no data</span>';
127
+ var W = 200, H = 40, pad = 4;
128
+ var min = Math.min.apply(null, pts), max = Math.max.apply(null, pts);
129
+ var range = (max - min) || 1;
130
+ var coords = [];
131
+ for (var j = 0; j < pts.length; j++) {
132
+ var x = pts.length === 1 ? W / 2 : pad + (j / (pts.length - 1)) * (W - 2 * pad);
133
+ var y = H - pad - ((pts[j] - min) / range) * (H - 2 * pad);
134
+ coords.push(x.toFixed(1) + "," + y.toFixed(1));
135
+ }
136
+ // Color the line by whether the last value is better than the first.
137
+ var stroke = "#9aa1ad";
138
+ if (pts.length >= 2) {
139
+ var rising = pts[pts.length - 1] > pts[0];
140
+ var falling = pts[pts.length - 1] < pts[0];
141
+ if ((rising && higherIsBetter) || (falling && !higherIsBetter)) stroke = "#34d399";
142
+ else if ((rising && !higherIsBetter) || (falling && higherIsBetter)) stroke = "#f87171";
143
+ }
144
+ return '<svg viewBox="0 0 ' + W + ' ' + H + '" preserveAspectRatio="none">' +
145
+ '<polyline fill="none" stroke="' + stroke + '" stroke-width="2" points="' + coords.join(" ") + '"/>' +
146
+ '</svg>';
147
+ }
148
+
149
+ var AXIS_DESC = {
150
+ council_pass_rate: "Share of runs the 3-reviewer council approved",
151
+ gate_pass_rate: "Share of quality gates passed per run",
152
+ iterations: "RARV iterations needed to complete a run",
153
+ interventions: "Human interventions needed per run"
154
+ };
155
+
156
+ function renderAxis(key, ax, series) {
157
+ var label = ax.label || key;
158
+ var desc = AXIS_DESC[key] || "";
159
+ var values = series.map(function (s) { return s[key]; });
160
+ var spark = sparkline(values, !!ax.higher_is_better);
161
+ var verdict;
162
+ if (!ax.available) {
163
+ verdict = '<div class="dir flat">-</div><div class="tag na">no data</div>';
164
+ } else if (ax.insufficient) {
165
+ verdict = '<div class="dir flat">-</div><div class="tag na">need 2+ runs</div>';
166
+ } else {
167
+ var dir = ax.direction || "flat";
168
+ var dirClass = dir === "flat" ? "flat" : (ax.improving ? (dir === "up" ? "up" : "down") : "bad");
169
+ var tagClass = ax.improving === true ? "good" : (ax.improving === false ? "bad" : "flat");
170
+ var tagText = ax.improving === true ? "improving" : (ax.improving === false ? "regressing" : "stable");
171
+ var latestStr = ax.higher_is_better ? pct(ax.latest) : num(ax.latest);
172
+ verdict = '<div class="dir ' + dirClass + '">' + arrow(dir) + ' ' + esc(dir) + '</div>' +
173
+ '<div class="tag ' + tagClass + '">' + tagText + ' (now ' + esc(latestStr) + ')</div>';
174
+ }
175
+ return '<div class="axis">' +
176
+ '<div class="meta"><div class="name">' + esc(label) + '</div>' +
177
+ '<div class="desc">' + esc(desc) + (ax.higher_is_better ? " (higher is better)" : " (lower is better)") + '</div></div>' +
178
+ '<div class="spark">' + spark + '</div>' +
179
+ '<div class="verdict">' + verdict + '</div>' +
180
+ '</div>';
181
+ }
182
+
183
+ function renderRuns(series) {
184
+ var html = '<h2>Run history</h2>';
185
+ if (!series || series.length === 0) {
186
+ html += '<div class="empty">No completed runs yet. Trust trajectory comes' +
187
+ ' from proof-of-run artifacts (<code>.loki/proofs/</code>), written at' +
188
+ ' the end of each run.</div>';
189
+ return html;
190
+ }
191
+ var rows = "";
192
+ for (var i = 0; i < series.length; i++) {
193
+ var s = series[i];
194
+ var cp = s.council_pass_rate;
195
+ var verdict = (cp === 1) ? '<span class="badge b-approve">PASS</span>'
196
+ : (cp === 0 ? '<span class="badge b-reject">FAIL</span>' : '<span class="muted">-</span>');
197
+ rows += '<tr><td class="mono">' + esc(s.run_id) + '</td>' +
198
+ '<td class="muted">' + esc(s.generated_at || "") + '</td>' +
199
+ '<td>' + verdict + '</td>' +
200
+ '<td class="num">' + (s.gate_pass_rate === null || s.gate_pass_rate === undefined ? "-" : pct(s.gate_pass_rate)) + '</td>' +
201
+ '<td class="num">' + (s.iterations === null || s.iterations === undefined ? "-" : esc(s.iterations)) + '</td>' +
202
+ '<td class="num">' + (s.interventions === null || s.interventions === undefined ? "-" : esc(s.interventions)) + '</td></tr>';
203
+ }
204
+ html += '<table><thead><tr>' +
205
+ '<th>Run</th><th>When</th><th>Council</th><th class="num">Gates</th>' +
206
+ '<th class="num">Iters</th><th class="num">Interv.</th>' +
207
+ '</tr></thead><tbody>' + rows + '</tbody></table>';
208
+ return html;
209
+ }
210
+
211
+ function render(d) {
212
+ var c = document.getElementById("content");
213
+ if (d && d.available === false) {
214
+ c.innerHTML = '<div class="empty">Trust trajectory is unavailable in this' +
215
+ ' install. ' + esc((d.notes && d.notes[0]) || "") + '</div>';
216
+ return;
217
+ }
218
+ var series = d.series || [];
219
+ if (d.insufficient) {
220
+ var msg = '<div class="headline"><strong>Not enough history yet.</strong><br>' +
221
+ 'Trust trajectory needs 2 or more recorded runs to show a direction. ' +
222
+ esc(d.runs_count || 0) + ' run(s) recorded so far. Run <code>loki start</code> again' +
223
+ ' and the trend appears here, derived from real council and gate results.</div>';
224
+ c.innerHTML = msg + renderRuns(series);
225
+ return;
226
+ }
227
+ var axes = d.axes || {};
228
+ var imp = d.improving_count || 0, reg = d.regressing_count || 0;
229
+ var hClass = (imp && !reg) ? "good" : (reg && !imp ? "bad" : "");
230
+ var headline;
231
+ if (imp && !reg) headline = "Trending more trustworthy: " + imp + " axis improving, none regressing on this repo.";
232
+ else if (reg && !imp) headline = "Trust regressing: " + reg + " axis regressing. Review recent runs.";
233
+ else if (imp || reg) headline = "Mixed: " + imp + " improving, " + reg + " regressing.";
234
+ else headline = "Stable: no significant change across axes yet.";
235
+
236
+ var cards = '<div class="cards">' +
237
+ '<div class="card"><div class="label">Runs analyzed</div>' +
238
+ '<div class="val">' + esc(d.runs_count || 0) + '</div>' +
239
+ '<div class="note">from .loki/proofs/</div></div>' +
240
+ '<div class="card"><div class="label">Improving axes</div>' +
241
+ '<div class="val" style="color:var(--green);">' + esc(imp) + '</div>' +
242
+ '<div class="note">good direction</div></div>' +
243
+ '<div class="card"><div class="label">Regressing axes</div>' +
244
+ '<div class="val" style="color:' + (reg ? 'var(--red)' : 'var(--muted)') + ';">' + esc(reg) + '</div>' +
245
+ '<div class="note">needs attention</div></div>' +
246
+ '</div>';
247
+
248
+ var axesHtml = '<h2>Earned-autonomy signals</h2><div class="axes">';
249
+ var order = ["council_pass_rate", "gate_pass_rate", "iterations", "interventions"];
250
+ for (var k = 0; k < order.length; k++) {
251
+ if (axes[order[k]]) axesHtml += renderAxis(order[k], axes[order[k]], series);
252
+ }
253
+ axesHtml += '</div>';
254
+
255
+ c.innerHTML = cards +
256
+ '<div class="headline ' + hClass + '" style="margin-top:14px;">' + esc(headline) + '</div>' +
257
+ axesHtml +
258
+ renderRuns(series);
259
+ }
260
+ function renderError(msg) {
261
+ document.getElementById("content").innerHTML =
262
+ '<div class="empty">Could not load trust data. ' + esc(msg || "") + "</div>";
263
+ }
264
+ fetch("/api/trust/trajectory", { headers: { "Accept": "application/json" } })
265
+ .then(function (r) { if (!r.ok) throw new Error("HTTP " + r.status); return r.json(); })
266
+ .then(function (d) { render(d || {}); })
267
+ .catch(function (e) { renderError(e && e.message); });
268
+ })();
269
+ </script>
270
+ </body>
271
+ </html>
@@ -2,7 +2,7 @@
2
2
 
3
3
  The flagship product of [Autonomi](https://www.autonomi.dev/). Complete installation instructions for all platforms and use cases.
4
4
 
5
- **Version:** v7.15.0
5
+ **Version:** v7.16.0
6
6
 
7
7
  ---
8
8
 
@@ -0,0 +1,127 @@
1
+ # R4: Visible Trust Trajectory - Design Note
2
+
3
+ Status: implemented in worktree (not yet merged). Author: R4 release team.
4
+ Verified against live source on 2026-06-03 (v7.8.3 worktree base; R1/R3/R5
5
+ already shipped, so the arc is further along than the loki-plan doc states).
6
+
7
+ ## The story no competitor tells
8
+
9
+ Devin, Cursor, Windsurf, Claude Code, Aider et al. show you a single run.
10
+ None show you whether the agent is getting more trustworthy on YOUR repo over
11
+ time. Loki already runs a 3-reviewer council + RARV-C closure on every run and
12
+ persists the result. R4 makes the resulting TRUST TRAJECTORY visible:
13
+
14
+ - council approve-rate trending UP
15
+ - gate pass-rate trending UP
16
+ - iterations-to-completion trending DOWN
17
+ - human interventions trending DOWN
18
+
19
+ If the agent is earning autonomy on this repo, the trajectory shows it. That is
20
+ compounding, repo-specific proof of trust -> stickiness.
21
+
22
+ ## Honest-data rule (non-negotiable)
23
+
24
+ Every number derives from REAL persisted run records. Never fabricate a trend.
25
+ With fewer than 2 runs, the trajectory is reported as "not enough history yet"
26
+ (insufficient=true), never a fake direction.
27
+
28
+ ## Data source (REUSED, not new)
29
+
30
+ R3 already established `.loki/proofs/<run_id>/proof.json` as the persistent,
31
+ one-per-run history record (written by `autonomy/lib/proof-generator.py` at run
32
+ completion, on both success and failure, unless `LOKI_PROOF=0`). The R3 cost
33
+ timeline endpoint (`dashboard/server.py` `/api/cost/timeline`) already mines
34
+ this exact directory for per-run cost history.
35
+
36
+ R4 mines the SAME directory for the trust signals already present in each
37
+ proof.json:
38
+
39
+ | Trust signal | proof.json path | Notes |
40
+ |-------------------------|------------------------------------------|-------|
41
+ | council pass (per run) | `council.final_verdict` | APPROVE/APPROVED/COMPLETE => pass |
42
+ | council ratio (per run) | `council.reviewers[].vote` (APPROVE/...) | secondary signal when verdict absent |
43
+ | gate pass-rate (per run)| `quality_gates.passed` / `.total` | already aggregated by generator |
44
+ | iterations (per run) | `iterations` (int or {count}) | iterations-to-completion |
45
+ | files changed (per run) | `files_changed.count` | context, not a trust axis |
46
+ | timestamp | `generated_at` (ISO 8601) | ordering axis |
47
+
48
+ Human interventions: there is no per-run intervention counter persisted in
49
+ proof.json today. Rather than fabricate one or add new instrumentation in this
50
+ slice, R4 reports interventions as a derived best-effort signal ONLY when the
51
+ proof carries it (`council.interventions` or top-level `interventions`), and
52
+ otherwise marks that axis `available=false` with an honest note. This keeps the
53
+ honest-data rule intact and leaves a clean seam for a future per-run
54
+ intervention counter (a one-line add in proof-generator.py).
55
+
56
+ ## Direction calculation (up / down / flat)
57
+
58
+ For each numeric axis across the time-ordered run series:
59
+
60
+ 1. Split the series into an earlier half and a later half (median split; odd
61
+ counts drop the middle point so the two halves never overlap).
62
+ 2. Compare the mean of the later half vs the earlier half.
63
+ 3. delta = later_mean - earlier_mean. Direction:
64
+ - `flat` if |delta| <= epsilon (epsilon scaled per axis; rates use 0.01).
65
+ - `up` / `down` by sign of delta.
66
+ 4. "Good direction" is axis-specific: higher is better for council/gate pass
67
+ rates; lower is better for iterations + interventions. The `improving`
68
+ boolean encodes whether the direction is the good one, so the UI can color
69
+ green/red without re-encoding the per-axis polarity.
70
+
71
+ Rationale for half-split vs least-squares slope: half-split is robust to a
72
+ single noisy run, needs no float regression in bash, and is trivially testable
73
+ with fixtures. A 2-run series degrades to last-vs-first, which is correct.
74
+
75
+ ## Persistence (under .loki/metrics/, REUSED dir)
76
+
77
+ The aggregated trajectory is persisted to
78
+ `.loki/metrics/trust-trajectory.json` (schema_version 1). This is a derived
79
+ cache, written by the `loki trust` command and the dashboard endpoint so other
80
+ surfaces can read a single source of truth. It is NOT authoritative state: it
81
+ is always recomputable from `.loki/proofs/`. Deleting it loses nothing.
82
+
83
+ ## Surfaces
84
+
85
+ 1. CLI: `loki trust [--json]` (NEW Bun-native command, mirrors `loki kpis`
86
+ exactly). Falls through to a bash `cmd_trust` when bun is absent (kpis had
87
+ no bash fallback; R4 adds one because the Python derivation is shared and
88
+ trivial to call from bash, giving real bash+Bun parity).
89
+ - `loki kpis` stays a single-run snapshot. R4 does NOT duplicate it; `trust`
90
+ is the across-runs trajectory view. `loki kpis` output gains a one-line
91
+ pointer to `loki trust` (no behavior change).
92
+
93
+ 2. Dashboard endpoint: `GET /api/trust/trajectory` (NEW, mirrors
94
+ `/api/cost/timeline`). Reads `.loki/proofs/*/proof.json`, returns the
95
+ per-run series + per-axis direction + insufficient flag.
96
+
97
+ 3. Dashboard panel: standalone `dashboard/static/trust.html` + `/trust` route
98
+ (mirrors `cost.html` + `/cost`), plus a nav entry and SPA section in
99
+ `build-standalone.js` (mirrors the cost panel wiring exactly).
100
+
101
+ 4. WS push: the `_push_loki_state_loop` broadcasts a `trust_update` message
102
+ when the trajectory's overall improving-count changes (mirrors the R3
103
+ `budget_status` transition push). No new channel; reuses manager.broadcast.
104
+
105
+ ## Parity + no-duplication audit
106
+
107
+ - Data: reuses `.loki/proofs/` (R1/R3). No new run-time instrumentation.
108
+ - Endpoint: new route, but copies the `/api/cost/timeline` read pattern and
109
+ the `_proofs_dir()` / `_safe_json_read` helpers verbatim in spirit.
110
+ - Panel: new `trust.html`, structurally a sibling of `cost.html`.
111
+ - CLI: new `trust`, structurally a sibling of `kpis`. `kpis` unchanged except a
112
+ one-line see-also.
113
+ - Shared derivation: a single Python module
114
+ (`autonomy/lib/trust_trajectory.py`) is the source of truth; the dashboard
115
+ endpoint imports it, and the bash `cmd_trust` shells out to it. The Bun
116
+ command reimplements the same pure logic in TS (parity-tested), matching how
117
+ `kpis` has both a TS derivation and reads the same JSON the bash side writes.
118
+
119
+ ## Test plan
120
+
121
+ - Python: `tests/test_trust_trajectory.py` - aggregation from fixture
122
+ proof.json files, direction calc (up/down/flat) per axis polarity, the
123
+ insufficient-history (<2 runs) case, no-PII (only derived numbers + run_id +
124
+ timestamps leave the function), malformed proof.json skipped not fatal.
125
+ - TS: `loki-ts/tests/metrics/trust.test.ts` - same aggregation + direction
126
+ parity on identical fixtures, insufficient case, JSON/human formatting.
127
+ - All mocked from on-disk fixtures. No provider calls, no paid calls.