loki-mode 7.26.0 → 7.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/autonomy/loki CHANGED
@@ -554,12 +554,14 @@ show_help() {
554
554
  echo " projects Multi-project registry management"
555
555
  echo " audit [cmd] Agent audit log and quality scanning (log|scan)"
556
556
  echo " heal <path> Legacy system healing (archaeology, stabilize, modernize)"
557
+ echo " verify [base] Deterministic PR verification (Autonomi Verify MVP; CI-gate exit codes)"
557
558
  echo " review [opts] Standalone code review with quality gates (diff, staged, PR, files)"
558
559
  echo " optimize Optimize prompts based on session history"
559
560
  echo " enterprise Enterprise feature management (tokens, OIDC)"
560
561
  echo " metrics [opts] Session productivity report (--json, --last N, --save, --share)"
561
562
  echo " cost [opts] Transparent cost view: per-run/project spend + budget (--json, --last N)"
562
563
  echo " trust [--json] Visible trust trajectory: council/gate pass-rate + interventions over runs [R4]"
564
+ echo " trust-metrics Trust-layer metrics: evidence-block rate, gate distribution, council split, cost/verified (--json)"
563
565
  echo " dogfood Show self-development statistics"
564
566
  echo " secrets [cmd] API key status and validation (status|validate)"
565
567
  echo " reset [target] Reset session state (all|retries|failed)"
@@ -11355,6 +11357,29 @@ with open(manifest_path, 'w') as f:
11355
11357
  # Modernize legacy codebases incrementally without breaking existing behavior.
11356
11358
  #===============================================================================
11357
11359
 
11360
+ # ---------------------------------------------------------------------------
11361
+ # loki verify - Autonomi Verify (Verification-as-a-Service MVP)
11362
+ #
11363
+ # Thin dispatcher that sources autonomy/verify.sh and delegates to its
11364
+ # verify_main(). The verification core is deliberately standalone (it does NOT
11365
+ # enter the autonomous iteration loop): it computes a PR-style merge-base diff
11366
+ # and runs deterministic gates against the current tree, emitting a verdict and
11367
+ # a consolidated evidence document. Deterministic-only in this MVP (no LLM
11368
+ # review). Exit code is propagated to the caller so the command is CI-gate
11369
+ # usable.
11370
+ # ---------------------------------------------------------------------------
11371
+ cmd_verify() {
11372
+ local verify_mod="$_LOKI_SCRIPT_DIR/verify.sh"
11373
+ if [ ! -f "$verify_mod" ]; then
11374
+ echo -e "${RED}Error: verify module not found at $verify_mod${NC}" >&2
11375
+ return 3
11376
+ fi
11377
+ # shellcheck source=/dev/null
11378
+ source "$verify_mod"
11379
+ verify_main "$@"
11380
+ return $?
11381
+ }
11382
+
11358
11383
  cmd_heal_help() {
11359
11384
  echo -e "${BOLD}loki heal${NC} - Legacy system healing (v6.67.0)"
11360
11385
  echo ""
@@ -13502,6 +13527,9 @@ main() {
13502
13527
  heal)
13503
13528
  cmd_heal "$@"
13504
13529
  ;;
13530
+ verify)
13531
+ cmd_verify "$@"
13532
+ ;;
13505
13533
  migrate)
13506
13534
  cmd_migrate "$@"
13507
13535
  ;;
@@ -13529,6 +13557,9 @@ main() {
13529
13557
  trust)
13530
13558
  cmd_trust "$@"
13531
13559
  ;;
13560
+ trust-metrics)
13561
+ cmd_trust_metrics "$@"
13562
+ ;;
13532
13563
  syslog)
13533
13564
  cmd_syslog "$@"
13534
13565
  ;;
@@ -18775,6 +18806,68 @@ cmd_trust() {
18775
18806
  python3 "$trust_mod" --loki-dir "$loki_dir" ${pass_args[@]+"${pass_args[@]}"}
18776
18807
  }
18777
18808
 
18809
+ # Trust-layer metrics (benchmark program section 3): the four AVAILABLE-TODAY
18810
+ # metrics nobody else can publish, computed for THIS project from the durable
18811
+ # trust-events.jsonl log plus the .loki/proofs/ corpus. Honest by construction:
18812
+ # each metric reports its own n= and says "not instrumented" rather than a
18813
+ # fabricated zero. Single project only.
18814
+ cmd_trust_metrics() {
18815
+ local pass_args=()
18816
+ while [[ $# -gt 0 ]]; do
18817
+ case "$1" in
18818
+ --help|-h)
18819
+ echo -e "${BOLD}loki trust-metrics${NC} - Trust-layer metrics (single project)"
18820
+ echo ""
18821
+ echo "Usage: loki trust-metrics [options]"
18822
+ echo ""
18823
+ echo "Computes the four trust-layer metrics from this project's"
18824
+ echo ".loki artifacts and emits .loki/metrics/trust-metrics.json"
18825
+ echo "plus a human-readable table:"
18826
+ echo " 1. Evidence-gate block rate (runs that caught an unproven"
18827
+ echo " 'done' claim before honoring completion)"
18828
+ echo " 2. Gate failure distribution per run (median, p90, per-gate)"
18829
+ echo " 3. Council rejection / split-verdict rate"
18830
+ echo " 4. Cost-per-VERIFIED-task (local verified denominator)"
18831
+ echo ""
18832
+ echo "Sources: .loki/metrics/trust-events.jsonl (durable event log)"
18833
+ echo "and .loki/proofs/<id>/proof.json. A metric with no source"
18834
+ echo "artifact is reported 'not instrumented', never a fake 0."
18835
+ echo ""
18836
+ echo "Options:"
18837
+ echo " --json Machine-readable JSON output"
18838
+ echo " --no-cache Do not write trust-metrics.json"
18839
+ echo " --help, -h Show this help"
18840
+ echo ""
18841
+ echo "Scope: SINGLE PROJECT only. An --all-projects registry"
18842
+ echo "aggregator is out of scope; run this inside each project."
18843
+ exit 0
18844
+ ;;
18845
+ --json) pass_args+=("--json"); shift ;;
18846
+ --no-cache) pass_args+=("--no-cache"); shift ;;
18847
+ --all-projects)
18848
+ echo -e "${RED}--all-projects is out of scope (single project only).${NC}"
18849
+ echo "Run 'loki trust-metrics' inside each project directory."
18850
+ exit 2
18851
+ ;;
18852
+ *) echo -e "${RED}Unknown option: $1${NC}"; echo "Run 'loki trust-metrics --help' for usage."; exit 1 ;;
18853
+ esac
18854
+ done
18855
+
18856
+ if ! command -v python3 &>/dev/null; then
18857
+ echo -e "${RED}python3 is required for trust metrics${NC}"
18858
+ exit 1
18859
+ fi
18860
+
18861
+ local tm_mod="$_LOKI_SCRIPT_DIR/lib/trust_metrics.py"
18862
+ if [ ! -f "$tm_mod" ]; then
18863
+ echo -e "${RED}trust_metrics.py not found at $tm_mod${NC}"
18864
+ exit 1
18865
+ fi
18866
+
18867
+ local loki_dir="${LOKI_DIR:-.loki}"
18868
+ python3 "$tm_mod" --loki-dir "$loki_dir" ${pass_args[@]+"${pass_args[@]}"}
18869
+ }
18870
+
18778
18871
  # Transparent cost view (R3): per-run + per-project spend, model routing, and
18779
18872
  # budget status with the 80% warn line. Reuses efficiency_cost.collect_efficiency
18780
18873
  # for the current-run aggregate (single source of truth) and reads .loki/proofs/
package/autonomy/run.sh CHANGED
@@ -1226,6 +1226,94 @@ emit_event_json() {
1226
1226
  log_debug "Event: $event_type - $json_data"
1227
1227
  }
1228
1228
 
1229
+ # Trust-layer metrics event writer (benchmark program section 3). Appends one
1230
+ # durable record per trust event to .loki/metrics/trust-events.jsonl via the
1231
+ # Python writer (single source of truth for the JSONL schema). This is ADDITIVE
1232
+ # and purely a side effect: it writes nothing to stdout, ignores all errors, and
1233
+ # never alters control flow or any caller's return value. The single-state
1234
+ # control files (evidence-block.json, gate-failure-count.json) are untouched;
1235
+ # this log exists because those files are erased on the successful-run path,
1236
+ # losing exactly the self-correction events the trust metrics publish.
1237
+ # Resolve a stable, UNIQUE-PER-RUN id for the trust event log. The cross-run
1238
+ # denominators (block rate, gate distribution) require ids that are distinct per
1239
+ # run. A persisted per-run file is the source of truth, NOT LOKI_SESSION_ID:
1240
+ # - On `loki start ./prd.md`, LOKI_SESSION_ID is unset entirely.
1241
+ # - On `loki run <issue>`, LOKI_SESSION_ID is the issue NUMBER, which is stable
1242
+ # across re-runs by design (so `loki stop <n>` works); using it would merge
1243
+ # every re-run of the same issue into one bucket and skew the rates.
1244
+ # So a fresh run always MINTS a new unique id into .loki/state/trust-run-id, and
1245
+ # every later event in that run reads it back. LOKI_SESSION_ID is only a
1246
+ # last-resort fallback when no minted file exists (e.g. an event fired before
1247
+ # any run_start, which the aggregator then treats as un-instrumented anyway).
1248
+ # Events never join to proof.json (Metrics 1-3 are events-only, Metric 4 is
1249
+ # proofs-only), so intra-log uniqueness is the only requirement.
1250
+ # Usage: _loki_trust_run_id [--new]
1251
+ _loki_trust_run_id() {
1252
+ local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
1253
+ local id_file="$loki_dir/state/trust-run-id"
1254
+ if [ "${1:-}" = "--new" ]; then
1255
+ # Fresh run: mint a new unique id (epoch + pid + short random) and
1256
+ # persist it as the source of truth for this run's events.
1257
+ local new_id
1258
+ new_id="run-$(date -u +%Y%m%d%H%M%S)-$$-${RANDOM:-0}"
1259
+ mkdir -p "$loki_dir/state" 2>/dev/null || true
1260
+ printf '%s' "$new_id" > "$id_file" 2>/dev/null || true
1261
+ printf '%s' "$new_id"
1262
+ return 0
1263
+ fi
1264
+ # Read path: the minted per-run file wins over LOKI_SESSION_ID so a resume
1265
+ # in a separate process (no exported LOKI_TRUST_RUN_ID) still resolves to
1266
+ # the same run, and a stable issue-number session id never collapses re-runs.
1267
+ if [ -s "$id_file" ]; then
1268
+ cat "$id_file" 2>/dev/null || true
1269
+ return 0
1270
+ fi
1271
+ if [ -n "${LOKI_SESSION_ID:-}" ]; then
1272
+ printf '%s' "$LOKI_SESSION_ID"
1273
+ return 0
1274
+ fi
1275
+ # No persisted id and no session id: empty -> writer records "unknown".
1276
+ printf '%s' ""
1277
+ }
1278
+
1279
+ # Usage: record_trust_event_bash <event_type> [key=value ...]
1280
+ # Pass LOKI_TRUST_RUN_ID in the environment to override the resolved id (the
1281
+ # run_start site sets it to the freshly minted id so the first event matches).
1282
+ record_trust_event_bash() {
1283
+ local event_type="$1"
1284
+ shift || true
1285
+ local tm_mod="$SCRIPT_DIR/lib/trust_metrics.py"
1286
+ [ -f "$tm_mod" ] || return 0
1287
+ command -v python3 >/dev/null 2>&1 || return 0
1288
+ local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
1289
+ local run_id="${LOKI_TRUST_RUN_ID:-$(_loki_trust_run_id)}"
1290
+ # Pass kv pairs as argv so Python parses (no shell JSON building). All
1291
+ # values stay strings except where the reader coerces (iteration -> int).
1292
+ _TM_LOKI_DIR="$loki_dir" \
1293
+ _TM_MOD_PATH="$tm_mod" \
1294
+ _TM_EVENT_TYPE="$event_type" \
1295
+ _TM_RUN_ID="$run_id" \
1296
+ _TM_ITERATION="${ITERATION_COUNT:-0}" \
1297
+ python3 - "$@" <<'TRUST_EVENT_PY' >/dev/null 2>&1 || true
1298
+ import os, sys, importlib.util
1299
+ spec = importlib.util.spec_from_file_location("trust_metrics", os.environ["_TM_MOD_PATH"])
1300
+ tm = importlib.util.module_from_spec(spec)
1301
+ spec.loader.exec_module(tm)
1302
+ fields = {}
1303
+ for arg in sys.argv[1:]:
1304
+ if "=" in arg:
1305
+ k, v = arg.split("=", 1)
1306
+ fields[k] = v
1307
+ tm.record_trust_event(
1308
+ os.environ["_TM_LOKI_DIR"],
1309
+ os.environ["_TM_EVENT_TYPE"],
1310
+ run_id=os.environ.get("_TM_RUN_ID", "") or None,
1311
+ iteration=os.environ.get("_TM_ITERATION", "0"),
1312
+ **fields,
1313
+ )
1314
+ TRUST_EVENT_PY
1315
+ }
1316
+
1229
1317
  # v7.0.2: Bash helper to emit a managed-agents event to the dashboard's
1230
1318
  # managed event log (.loki/managed/events.ndjson). Mirrors the Python
1231
1319
  # emit_managed_event helper so bash callers can land events in the same
@@ -2916,7 +3004,7 @@ spawn_worktree_session() {
2916
3004
  >> "$log_file" 2>&1 || _wt_exit=$?
2917
3005
  ;;
2918
3006
  codex)
2919
- codex exec --full-auto \
3007
+ codex exec --full-auto --skip-git-repo-check \
2920
3008
  "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
2921
3009
  >> "$log_file" 2>&1 || _wt_exit=$?
2922
3010
  ;;
@@ -3117,7 +3205,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
3117
3205
  resolution=$(claude --dangerously-skip-permissions -p "$conflict_prompt" --output-format text 2>/dev/null)
3118
3206
  ;;
3119
3207
  codex)
3120
- resolution=$(codex exec --full-auto "$conflict_prompt" 2>/dev/null)
3208
+ resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
3121
3209
  ;;
3122
3210
  cline)
3123
3211
  resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
@@ -6551,6 +6639,13 @@ print(counts[gate_name])
6551
6639
  loki_crash_friction "gate_failure" "gate=${gate_name} consecutive=${count}" >/dev/null 2>&1 || true
6552
6640
  fi
6553
6641
 
6642
+ # Trust-metrics: append a durable per-failure record so the gate-failure
6643
+ # distribution survives clear_gate_failure (which resets the running
6644
+ # counter). CRITICAL: this function's stdout IS its return value, so the
6645
+ # write is fully stdout-suppressed and best-effort; it cannot change the
6646
+ # echoed count or any gate behavior.
6647
+ record_trust_event_bash "gate_failure" "gate=${gate_name}" "consecutive=${count}" >/dev/null 2>&1 || true
6648
+
6554
6649
  echo "$count"
6555
6650
  }
6556
6651
 
@@ -7500,7 +7595,7 @@ BUILD_PROMPT
7500
7595
  --output-format text > "$review_output" 2>/dev/null
7501
7596
  ;;
7502
7597
  codex)
7503
- codex exec --full-auto "$prompt_text" \
7598
+ codex exec --full-auto --skip-git-repo-check "$prompt_text" \
7504
7599
  > "$review_output" 2>/dev/null
7505
7600
  ;;
7506
7601
  cline)
@@ -7715,7 +7810,7 @@ ADVERSARIAL_EOF
7715
7810
  ;;
7716
7811
  codex)
7717
7812
  if command -v codex &>/dev/null; then
7718
- codex exec --full-auto "$adversarial_prompt" \
7813
+ codex exec --full-auto --skip-git-repo-check "$adversarial_prompt" \
7719
7814
  > "$result_file" 2>/dev/null || true
7720
7815
  fi
7721
7816
  ;;
@@ -11900,6 +11995,19 @@ run_autonomous() {
11900
11995
  _LOKI_RUN_START_SHA="$(cat "$_start_sha_file" 2>/dev/null || echo "")"
11901
11996
  export _LOKI_RUN_START_SHA
11902
11997
 
11998
+ # Trust-metrics instrumentation marker: record one run_start event per
11999
+ # fresh run so the trust-metrics denominator counts ONLY instrumented runs.
12000
+ # This is what lets the aggregator distinguish "0 blocks measured" from
12001
+ # "this run predates instrumentation" (the central honesty rule). Additive,
12002
+ # best-effort, stdout-silent; never affects control flow. Mint a fresh
12003
+ # per-run id here and export it so every later event in this run shares it
12004
+ # (LOKI_SESSION_ID is absent on the `loki start` path).
12005
+ if [ "${ITERATION_COUNT:-0}" -eq 0 ]; then
12006
+ LOKI_TRUST_RUN_ID="$(_loki_trust_run_id --new)"
12007
+ export LOKI_TRUST_RUN_ID
12008
+ record_trust_event_bash "run_start" "start_sha=${_LOKI_RUN_START_SHA:-}" 2>/dev/null || true
12009
+ fi
12010
+
11903
12011
  # Notify dashboard of active project directory (for AI Chat cross-directory usage)
11904
12012
  if command -v curl &>/dev/null; then
11905
12013
  local project_cwd
@@ -12586,7 +12694,7 @@ if __name__ == "__main__":
12586
12694
  # Uses dynamic tier from RARV phase (tier_param already set above)
12587
12695
  { LOKI_CODEX_REASONING_EFFORT="$tier_param" \
12588
12696
  CODEX_MODEL_REASONING_EFFORT="$tier_param" \
12589
- codex exec --full-auto \
12697
+ codex exec --full-auto --skip-git-repo-check \
12590
12698
  "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
12591
12699
  } && exit_code=0 || exit_code=$?
12592
12700
  ;;