loki-mode 7.14.0 → 7.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Autonomous spec-to-product system. Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product via the RARV-C closure loop, with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v7.14.0
6
+ # Loki Mode v7.16.0
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -383,4 +383,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
383
383
 
384
384
  ---
385
385
 
386
- **v7.14.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
386
+ **v7.16.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 7.14.0
1
+ 7.16.0
@@ -0,0 +1,437 @@
1
+ #!/usr/bin/env python3
2
+ """R4 trust trajectory - derive a per-project trust trend from proof-of-run history.
3
+
4
+ The story no competitor tells: show whether the agent is EARNING autonomy on
5
+ THIS repo over time. We derive the trajectory from the persistent per-run
6
+ records that R1/R3 already write to .loki/proofs/<run_id>/proof.json. No new
7
+ run-time instrumentation; this is a pure read-and-aggregate layer.
8
+
9
+ Axes (each derived from fields already present in proof.json):
10
+ council_pass_rate higher is better (council.final_verdict approve => 1.0)
11
+ gate_pass_rate higher is better (quality_gates.passed / .total)
12
+ iterations lower is better (iterations.count)
13
+ interventions lower is better (best-effort; available only when a
14
+ proof carries it; never fabricated)
15
+
16
+ Honest-data rule: with fewer than 2 runs the trajectory is "insufficient"
17
+ (insufficient=True) and no direction is invented. Numbers are only ever derived
18
+ from real proof.json values; a missing axis is reported available=False, not 0.
19
+
20
+ Direction (up/down/flat) is computed by a median half-split: mean of the later
21
+ half minus mean of the earlier half. Robust to one noisy run, no float
22
+ regression needed, and a 2-run series degrades to last-vs-first.
23
+
24
+ Public API:
25
+ compute_trajectory(loki_dir) -> dict (schema_version 1)
26
+ format_trajectory_human(traj) -> str
27
+ format_trajectory_json(traj) -> str
28
+ write_trajectory_cache(loki_dir, traj) -> str | None
29
+ main(argv) -> int (CLI entry: prints human or --json)
30
+
31
+ No external deps. Python 3.8+ (matches the rest of autonomy/lib).
32
+ """
33
+
34
+ import json
35
+ import os
36
+ import sys
37
+ from datetime import datetime, timezone
38
+
39
+ SCHEMA_VERSION = 1
40
+
41
+ # Per-axis "good direction" polarity. True => higher is better.
42
+ _AXIS_HIGHER_IS_BETTER = {
43
+ "council_pass_rate": True,
44
+ "gate_pass_rate": True,
45
+ "iterations": False,
46
+ "interventions": False,
47
+ }
48
+
49
+ # Per-axis flat epsilon. Rates live in [0,1]; counts use a larger band.
50
+ _AXIS_EPSILON = {
51
+ "council_pass_rate": 0.01,
52
+ "gate_pass_rate": 0.01,
53
+ "iterations": 0.25,
54
+ "interventions": 0.25,
55
+ }
56
+
57
+ _AXIS_LABELS = {
58
+ "council_pass_rate": "Council pass rate",
59
+ "gate_pass_rate": "Gate pass rate",
60
+ "iterations": "Iterations to completion",
61
+ "interventions": "Human interventions",
62
+ }
63
+
64
+ # Verdict tokens that count as a council pass.
65
+ _PASS_TOKENS = ("APPROVE", "APPROVED", "COMPLETE", "PASS", "PASSED")
66
+
67
+
68
+ def _read_json(path, default=None):
69
+ try:
70
+ with open(path, "r", encoding="utf-8") as fh:
71
+ return json.load(fh)
72
+ except Exception:
73
+ return default
74
+
75
+
76
+ def _obj(v):
77
+ return v if isinstance(v, dict) else {}
78
+
79
+
80
+ def _verdict_is_pass(verdict):
81
+ """Map a council final_verdict string to a pass (True) / fail (False)."""
82
+ v = str(verdict or "").strip().upper()
83
+ if not v:
84
+ return None
85
+ for tok in _PASS_TOKENS:
86
+ if v.startswith(tok):
87
+ return True
88
+ return False
89
+
90
+
91
+ def _council_pass_value(council):
92
+ """Per-run council pass as 1.0 / 0.0, or None when no council signal."""
93
+ council = _obj(council)
94
+ # Primary: explicit final verdict.
95
+ fv = _verdict_is_pass(council.get("final_verdict"))
96
+ if fv is not None:
97
+ return 1.0 if fv else 0.0
98
+ # Secondary: reviewer roll-up. A run "passes" when every reviewer approved.
99
+ reviewers = council.get("reviewers")
100
+ if isinstance(reviewers, list) and reviewers:
101
+ approve = 0
102
+ counted = 0
103
+ for r in reviewers:
104
+ if not isinstance(r, dict):
105
+ continue
106
+ counted += 1
107
+ vote = str(r.get("vote") or "").strip().upper()
108
+ if any(vote.startswith(tok) for tok in _PASS_TOKENS):
109
+ approve += 1
110
+ if counted:
111
+ return 1.0 if approve == counted else 0.0
112
+ return None
113
+
114
+
115
+ def _gate_rate_value(quality_gates):
116
+ """Per-run gate pass-rate in [0,1], or None when no gates recorded."""
117
+ qg = _obj(quality_gates)
118
+ total = qg.get("total")
119
+ passed = qg.get("passed")
120
+ try:
121
+ total = int(total)
122
+ passed = int(passed)
123
+ except (TypeError, ValueError):
124
+ return None
125
+ if total <= 0:
126
+ return None
127
+ return max(0.0, min(1.0, passed / total))
128
+
129
+
130
+ def _iterations_value(iterations):
131
+ """Per-run iteration count, or None when not recorded."""
132
+ if isinstance(iterations, dict):
133
+ c = iterations.get("count")
134
+ else:
135
+ c = iterations
136
+ try:
137
+ c = int(c)
138
+ except (TypeError, ValueError):
139
+ return None
140
+ if c < 0:
141
+ return None
142
+ return float(c)
143
+
144
+
145
+ def _interventions_value(proof):
146
+ """Per-run human-intervention count, ONLY when the proof carries it.
147
+
148
+ There is no per-run intervention counter persisted today; we read it
149
+ opportunistically so the axis lights up the moment that field exists, but
150
+ we never fabricate a value. Returns None when absent.
151
+ """
152
+ council = _obj(proof.get("council"))
153
+ for src in (council.get("interventions"), proof.get("interventions")):
154
+ try:
155
+ n = int(src)
156
+ except (TypeError, ValueError):
157
+ continue
158
+ if n >= 0:
159
+ return float(n)
160
+ return None
161
+
162
+
163
+ def _load_runs(loki_dir):
164
+ """Read every .loki/proofs/<id>/proof.json into a time-ordered run list."""
165
+ proofs_dir = os.path.join(loki_dir, "proofs")
166
+ runs = []
167
+ try:
168
+ entries = sorted(os.listdir(proofs_dir))
169
+ except (OSError, FileNotFoundError):
170
+ return runs
171
+ for name in entries:
172
+ d = os.path.join(proofs_dir, name)
173
+ if not os.path.isdir(d):
174
+ continue
175
+ proof = _read_json(os.path.join(d, "proof.json"), default=None)
176
+ if not isinstance(proof, dict):
177
+ # Malformed / partial proof: skip, do not fail the trajectory.
178
+ continue
179
+ runs.append({
180
+ "run_id": str(proof.get("run_id") or name),
181
+ "generated_at": proof.get("generated_at"),
182
+ "council_pass_rate": _council_pass_value(proof.get("council")),
183
+ "gate_pass_rate": _gate_rate_value(proof.get("quality_gates")),
184
+ "iterations": _iterations_value(proof.get("iterations")),
185
+ "interventions": _interventions_value(proof),
186
+ })
187
+ # Order by generated_at ascending; runs without a timestamp sort last but
188
+ # keep stable directory order among themselves.
189
+ runs.sort(key=lambda r: (r.get("generated_at") is None, r.get("generated_at") or ""))
190
+ return runs
191
+
192
+
193
+ def _mean(values):
194
+ return sum(values) / len(values) if values else None
195
+
196
+
197
+ def _direction_for_axis(axis, ordered_values):
198
+ """Compute direction for one axis over the time-ordered non-null values.
199
+
200
+ Returns a dict describing the axis trend, or available=False when there is
201
+ not enough non-null data (fewer than 2 points) to state a direction.
202
+ """
203
+ higher_is_better = _AXIS_HIGHER_IS_BETTER[axis]
204
+ eps = _AXIS_EPSILON[axis]
205
+ pts = [v for v in ordered_values if v is not None]
206
+ n = len(pts)
207
+ if n == 0:
208
+ return {
209
+ "axis": axis,
210
+ "label": _AXIS_LABELS[axis],
211
+ "available": False,
212
+ "higher_is_better": higher_is_better,
213
+ "note": "no runs recorded this metric",
214
+ }
215
+ if n < 2:
216
+ return {
217
+ "axis": axis,
218
+ "label": _AXIS_LABELS[axis],
219
+ "available": True,
220
+ "data_points": n,
221
+ "latest": round(pts[-1], 4),
222
+ "higher_is_better": higher_is_better,
223
+ "direction": "flat",
224
+ "improving": None,
225
+ "delta": 0.0,
226
+ "earlier_mean": round(pts[0], 4),
227
+ "later_mean": round(pts[-1], 4),
228
+ "insufficient": True,
229
+ "note": "not enough history yet (need 2+ runs with this metric)",
230
+ }
231
+ # Median half-split. Odd count drops the middle so halves never overlap.
232
+ half = n // 2
233
+ earlier = pts[:half]
234
+ later = pts[n - half:]
235
+ earlier_mean = _mean(earlier)
236
+ later_mean = _mean(later)
237
+ delta = later_mean - earlier_mean
238
+ if abs(delta) <= eps:
239
+ direction = "flat"
240
+ elif delta > 0:
241
+ direction = "up"
242
+ else:
243
+ direction = "down"
244
+ if direction == "flat":
245
+ improving = None
246
+ else:
247
+ going_up = direction == "up"
248
+ improving = (going_up == higher_is_better)
249
+ return {
250
+ "axis": axis,
251
+ "label": _AXIS_LABELS[axis],
252
+ "available": True,
253
+ "data_points": n,
254
+ "latest": round(pts[-1], 4),
255
+ "higher_is_better": higher_is_better,
256
+ "direction": direction,
257
+ "improving": improving,
258
+ "delta": round(delta, 4),
259
+ "earlier_mean": round(earlier_mean, 4),
260
+ "later_mean": round(later_mean, 4),
261
+ "insufficient": False,
262
+ }
263
+
264
+
265
+ def compute_trajectory(loki_dir):
266
+ """Return the R4 trust trajectory snapshot for a project's .loki dir."""
267
+ runs = _load_runs(loki_dir)
268
+ axes_order = ["council_pass_rate", "gate_pass_rate", "iterations", "interventions"]
269
+
270
+ series = []
271
+ for r in runs:
272
+ series.append({
273
+ "run_id": r["run_id"],
274
+ "generated_at": r["generated_at"],
275
+ "council_pass_rate": r["council_pass_rate"],
276
+ "gate_pass_rate": r["gate_pass_rate"],
277
+ "iterations": r["iterations"],
278
+ "interventions": r["interventions"],
279
+ })
280
+
281
+ axes = {}
282
+ for axis in axes_order:
283
+ axes[axis] = _direction_for_axis(axis, [r[axis] for r in runs])
284
+
285
+ insufficient = len(runs) < 2
286
+ improving_axes = [
287
+ a for a in axes_order
288
+ if axes[a].get("available") and axes[a].get("improving") is True
289
+ ]
290
+ regressing_axes = [
291
+ a for a in axes_order
292
+ if axes[a].get("available") and axes[a].get("improving") is False
293
+ ]
294
+
295
+ notes = []
296
+ if insufficient:
297
+ notes.append(
298
+ "not enough history yet: %d run(s) recorded, need 2+ to show a trend"
299
+ % len(runs)
300
+ )
301
+ if not axes["interventions"].get("available"):
302
+ notes.append(
303
+ "intervention trend unavailable: no per-run intervention count in "
304
+ "proof.json yet (axis lights up automatically once recorded)"
305
+ )
306
+
307
+ return {
308
+ "schema_version": SCHEMA_VERSION,
309
+ "generated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
310
+ "loki_dir": loki_dir,
311
+ "runs_count": len(runs),
312
+ "insufficient": insufficient,
313
+ "axes": axes,
314
+ "improving_count": len(improving_axes),
315
+ "regressing_count": len(regressing_axes),
316
+ "improving_axes": improving_axes,
317
+ "regressing_axes": regressing_axes,
318
+ "series": series,
319
+ "notes": notes,
320
+ }
321
+
322
+
323
+ def write_trajectory_cache(loki_dir, traj):
324
+ """Persist the derived trajectory to .loki/metrics/trust-trajectory.json.
325
+
326
+ Best-effort: returns the path on success, None on failure. This is a cache,
327
+ always recomputable from .loki/proofs/; failure to write is non-fatal.
328
+ """
329
+ out_dir = os.path.join(loki_dir, "metrics")
330
+ out_path = os.path.join(out_dir, "trust-trajectory.json")
331
+ try:
332
+ os.makedirs(out_dir, exist_ok=True)
333
+ with open(out_path, "w", encoding="utf-8") as fh:
334
+ json.dump(traj, fh, indent=2)
335
+ return out_path
336
+ except Exception:
337
+ return None
338
+
339
+
340
+ def format_trajectory_json(traj):
341
+ return json.dumps(traj, indent=2)
342
+
343
+
344
+ def _arrow(direction):
345
+ return {"up": "up", "down": "down", "flat": "flat"}.get(direction, "?")
346
+
347
+
348
+ def _fmt_axis_line(ax):
349
+ label = ax.get("label", ax.get("axis", "?"))
350
+ if not ax.get("available"):
351
+ return " %-26s %s" % (label + ":", "no data")
352
+ direction = ax.get("direction", "flat")
353
+ latest = ax.get("latest")
354
+ higher = ax.get("higher_is_better")
355
+ if ax.get("insufficient"):
356
+ tag = "(need 2+ runs)"
357
+ elif ax.get("improving") is True:
358
+ tag = "improving"
359
+ elif ax.get("improving") is False:
360
+ tag = "regressing"
361
+ else:
362
+ tag = "stable"
363
+ polarity = "higher better" if higher else "lower better"
364
+ return " %-26s %-5s latest=%-7s %-11s [%s]" % (
365
+ label + ":", _arrow(direction), latest, tag, polarity,
366
+ )
367
+
368
+
369
+ def format_trajectory_human(traj):
370
+ lines = []
371
+ lines.append("Loki Mode Trust Trajectory (snapshot at %s)" % traj.get("generated_at"))
372
+ lines.append("Source: %s" % traj.get("loki_dir"))
373
+ lines.append("Runs analyzed: %s" % traj.get("runs_count"))
374
+ lines.append("")
375
+ if traj.get("insufficient"):
376
+ lines.append("Not enough history yet.")
377
+ lines.append("Trust trajectory needs 2+ recorded runs to show a direction.")
378
+ lines.append("Each `loki start` run writes a proof-of-run; come back after the next run.")
379
+ if traj.get("notes"):
380
+ lines.append("")
381
+ lines.append("Notes")
382
+ for n in traj["notes"]:
383
+ lines.append(" - %s" % n)
384
+ return "\n".join(lines)
385
+ axes = traj.get("axes", {})
386
+ lines.append("Is the agent earning autonomy on this repo?")
387
+ for axis in ("council_pass_rate", "gate_pass_rate", "iterations", "interventions"):
388
+ if axis in axes:
389
+ lines.append(_fmt_axis_line(axes[axis]))
390
+ lines.append("")
391
+ imp = traj.get("improving_count", 0)
392
+ reg = traj.get("regressing_count", 0)
393
+ if imp and not reg:
394
+ lines.append("Overall: trending more trustworthy (%d axis improving)." % imp)
395
+ elif reg and not imp:
396
+ lines.append("Overall: trust regressing (%d axis regressing). Review recent runs." % reg)
397
+ elif imp or reg:
398
+ lines.append("Overall: mixed (%d improving / %d regressing)." % (imp, reg))
399
+ else:
400
+ lines.append("Overall: stable.")
401
+ if traj.get("notes"):
402
+ lines.append("")
403
+ lines.append("Notes")
404
+ for n in traj["notes"]:
405
+ lines.append(" - %s" % n)
406
+ return "\n".join(lines)
407
+
408
+
409
+ def _resolve_loki_dir(argv):
410
+ for i, a in enumerate(argv):
411
+ if a == "--loki-dir" and i + 1 < len(argv):
412
+ return argv[i + 1]
413
+ if a.startswith("--loki-dir="):
414
+ return a.split("=", 1)[1]
415
+ env = os.environ.get("LOKI_DIR")
416
+ if env:
417
+ return env
418
+ return os.path.join(os.getcwd(), ".loki")
419
+
420
+
421
+ def main(argv=None):
422
+ argv = list(sys.argv[1:] if argv is None else argv)
423
+ as_json = "--json" in argv
424
+ no_cache = "--no-cache" in argv
425
+ loki_dir = _resolve_loki_dir(argv)
426
+ traj = compute_trajectory(loki_dir)
427
+ if not no_cache:
428
+ write_trajectory_cache(loki_dir, traj)
429
+ if as_json:
430
+ sys.stdout.write(format_trajectory_json(traj) + "\n")
431
+ else:
432
+ sys.stdout.write(format_trajectory_human(traj) + "\n")
433
+ return 0
434
+
435
+
436
+ if __name__ == "__main__":
437
+ raise SystemExit(main())