loki-mode 7.26.0 → 7.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/autonomy/run.sh CHANGED
@@ -1226,6 +1226,94 @@ emit_event_json() {
1226
1226
  log_debug "Event: $event_type - $json_data"
1227
1227
  }
1228
1228
 
1229
+ # Trust-layer metrics event writer (benchmark program section 3). Appends one
1230
+ # durable record per trust event to .loki/metrics/trust-events.jsonl via the
1231
+ # Python writer (single source of truth for the JSONL schema). This is ADDITIVE
1232
+ # and purely a side effect: it writes nothing to stdout, ignores all errors, and
1233
+ # never alters control flow or any caller's return value. The single-state
1234
+ # control files (evidence-block.json, gate-failure-count.json) are untouched;
1235
+ # this log exists because those files are erased on the successful-run path,
1236
+ # losing exactly the self-correction events the trust metrics publish.
1237
+ # Resolve a stable, UNIQUE-PER-RUN id for the trust event log. The cross-run
1238
+ # denominators (block rate, gate distribution) require ids that are distinct per
1239
+ # run. A persisted per-run file is the source of truth, NOT LOKI_SESSION_ID:
1240
+ # - On `loki start ./prd.md`, LOKI_SESSION_ID is unset entirely.
1241
+ # - On `loki run <issue>`, LOKI_SESSION_ID is the issue NUMBER, which is stable
1242
+ # across re-runs by design (so `loki stop <n>` works); using it would merge
1243
+ # every re-run of the same issue into one bucket and skew the rates.
1244
+ # So a fresh run always MINTS a new unique id into .loki/state/trust-run-id, and
1245
+ # every later event in that run reads it back. LOKI_SESSION_ID is only a
1246
+ # last-resort fallback when no minted file exists (e.g. an event fired before
1247
+ # any run_start, which the aggregator then treats as un-instrumented anyway).
1248
+ # Events never join to proof.json (Metrics 1-3 are events-only, Metric 4 is
1249
+ # proofs-only), so intra-log uniqueness is the only requirement.
1250
+ # Usage: _loki_trust_run_id [--new]
1251
+ _loki_trust_run_id() {
1252
+ local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
1253
+ local id_file="$loki_dir/state/trust-run-id"
1254
+ if [ "${1:-}" = "--new" ]; then
1255
+ # Fresh run: mint a new unique id (epoch + pid + short random) and
1256
+ # persist it as the source of truth for this run's events.
1257
+ local new_id
1258
+ new_id="run-$(date -u +%Y%m%d%H%M%S)-$$-${RANDOM:-0}"
1259
+ mkdir -p "$loki_dir/state" 2>/dev/null || true
1260
+ printf '%s' "$new_id" > "$id_file" 2>/dev/null || true
1261
+ printf '%s' "$new_id"
1262
+ return 0
1263
+ fi
1264
+ # Read path: the minted per-run file wins over LOKI_SESSION_ID so a resume
1265
+ # in a separate process (no exported LOKI_TRUST_RUN_ID) still resolves to
1266
+ # the same run, and a stable issue-number session id never collapses re-runs.
1267
+ if [ -s "$id_file" ]; then
1268
+ cat "$id_file" 2>/dev/null || true
1269
+ return 0
1270
+ fi
1271
+ if [ -n "${LOKI_SESSION_ID:-}" ]; then
1272
+ printf '%s' "$LOKI_SESSION_ID"
1273
+ return 0
1274
+ fi
1275
+ # No persisted id and no session id: empty -> writer records "unknown".
1276
+ printf '%s' ""
1277
+ }
1278
+
1279
+ # Usage: record_trust_event_bash <event_type> [key=value ...]
1280
+ # Pass LOKI_TRUST_RUN_ID in the environment to override the resolved id (the
1281
+ # run_start site sets it to the freshly minted id so the first event matches).
1282
+ record_trust_event_bash() {
1283
+ local event_type="$1"
1284
+ shift || true
1285
+ local tm_mod="$SCRIPT_DIR/lib/trust_metrics.py"
1286
+ [ -f "$tm_mod" ] || return 0
1287
+ command -v python3 >/dev/null 2>&1 || return 0
1288
+ local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
1289
+ local run_id="${LOKI_TRUST_RUN_ID:-$(_loki_trust_run_id)}"
1290
+ # Pass kv pairs as argv so Python parses (no shell JSON building). All
1291
+ # values stay strings except where the reader coerces (iteration -> int).
1292
+ _TM_LOKI_DIR="$loki_dir" \
1293
+ _TM_MOD_PATH="$tm_mod" \
1294
+ _TM_EVENT_TYPE="$event_type" \
1295
+ _TM_RUN_ID="$run_id" \
1296
+ _TM_ITERATION="${ITERATION_COUNT:-0}" \
1297
+ python3 - "$@" <<'TRUST_EVENT_PY' >/dev/null 2>&1 || true
1298
+ import os, sys, importlib.util
1299
+ spec = importlib.util.spec_from_file_location("trust_metrics", os.environ["_TM_MOD_PATH"])
1300
+ tm = importlib.util.module_from_spec(spec)
1301
+ spec.loader.exec_module(tm)
1302
+ fields = {}
1303
+ for arg in sys.argv[1:]:
1304
+ if "=" in arg:
1305
+ k, v = arg.split("=", 1)
1306
+ fields[k] = v
1307
+ tm.record_trust_event(
1308
+ os.environ["_TM_LOKI_DIR"],
1309
+ os.environ["_TM_EVENT_TYPE"],
1310
+ run_id=os.environ.get("_TM_RUN_ID", "") or None,
1311
+ iteration=os.environ.get("_TM_ITERATION", "0"),
1312
+ **fields,
1313
+ )
1314
+ TRUST_EVENT_PY
1315
+ }
1316
+
1229
1317
  # v7.0.2: Bash helper to emit a managed-agents event to the dashboard's
1230
1318
  # managed event log (.loki/managed/events.ndjson). Mirrors the Python
1231
1319
  # emit_managed_event helper so bash callers can land events in the same
@@ -2478,6 +2566,26 @@ except Exception:
2478
2566
  local ts
2479
2567
  ts="$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date)"
2480
2568
 
2569
+ # v7.28.0: evidence-gate inconclusive line. When the evidence gate could not
2570
+ # establish a diff baseline (no git repo, or no run-start SHA), it records a
2571
+ # durable .loki/state/evidence-inconclusive.json instead of silently passing.
2572
+ # Surface one honest line so the user knows completion was not independently
2573
+ # verified. The record is removed by the gate on any conclusive run.
2574
+ local evidence_inconclusive_line=""
2575
+ local _inc_file="$loki_dir/state/evidence-inconclusive.json"
2576
+ if [ -f "$_inc_file" ]; then
2577
+ local _inc_reason
2578
+ _inc_reason="$(python3 -c "import json,sys
2579
+ try:
2580
+ d=json.load(open(sys.argv[1]))
2581
+ print(d.get('reason','') if d.get('inconclusive') else '')
2582
+ except Exception:
2583
+ print('')" "$_inc_file" 2>/dev/null)"
2584
+ if [ -n "$_inc_reason" ]; then
2585
+ evidence_inconclusive_line="Evidence gate: inconclusive (${_inc_reason}) - completion not independently verified"
2586
+ fi
2587
+ fi
2588
+
2481
2589
  # ---- Durable human-readable file: .loki/COMPLETION.txt --------------------
2482
2590
  {
2483
2591
  echo "Loki Mode run summary"
@@ -2508,6 +2616,10 @@ except Exception:
2508
2616
  fi
2509
2617
  echo "Tasks: pending=$pending in_progress=$in_progress completed=$completed failed=$failed"
2510
2618
  echo ""
2619
+ if [ -n "$evidence_inconclusive_line" ]; then
2620
+ echo "$evidence_inconclusive_line"
2621
+ echo ""
2622
+ fi
2511
2623
  echo "Review the work:"
2512
2624
  echo " $review_cmd"
2513
2625
  echo ""
@@ -2916,7 +3028,7 @@ spawn_worktree_session() {
2916
3028
  >> "$log_file" 2>&1 || _wt_exit=$?
2917
3029
  ;;
2918
3030
  codex)
2919
- codex exec --full-auto \
3031
+ codex exec --full-auto --skip-git-repo-check \
2920
3032
  "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
2921
3033
  >> "$log_file" 2>&1 || _wt_exit=$?
2922
3034
  ;;
@@ -3117,7 +3229,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
3117
3229
  resolution=$(claude --dangerously-skip-permissions -p "$conflict_prompt" --output-format text 2>/dev/null)
3118
3230
  ;;
3119
3231
  codex)
3120
- resolution=$(codex exec --full-auto "$conflict_prompt" 2>/dev/null)
3232
+ resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
3121
3233
  ;;
3122
3234
  cline)
3123
3235
  resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
@@ -4602,6 +4714,57 @@ print_ttfv_next_steps() {
4602
4714
  return 0
4603
4715
  }
4604
4716
 
4717
+ # _read_iteration_cost <iteration>
4718
+ # Emit "input output cost cache_read cache_creation" for the given iteration,
4719
+ # preferring the authoritative result-cost file written by the embedded stream
4720
+ # parser (Claude'\''s own total_cost_usd + usage, slug/symlink-independent) over
4721
+ # the context-tracker-derived estimate in tracking.json. Falls back to
4722
+ # tracking.json when no result-cost file exists, and to all zeros otherwise.
4723
+ # Best-effort: any parse failure yields "0 0 0 0 0" and never aborts.
4724
+ _read_iteration_cost() {
4725
+ local iteration="$1"
4726
+ local result_cost_file=".loki/metrics/result-cost-${iteration}.json"
4727
+ if [ -f "$result_cost_file" ]; then
4728
+ python3 -c "
4729
+ import json
4730
+ try:
4731
+ d = json.load(open('$result_cost_file'))
4732
+ print(
4733
+ d.get('input_tokens', 0) or 0,
4734
+ d.get('output_tokens', 0) or 0,
4735
+ d.get('total_cost_usd', 0) or 0,
4736
+ d.get('cache_read_tokens', 0) or 0,
4737
+ d.get('cache_creation_tokens', 0) or 0,
4738
+ )
4739
+ except Exception:
4740
+ print(0, 0, 0, 0, 0)
4741
+ " 2>/dev/null || echo "0 0 0 0 0"
4742
+ elif [ -f ".loki/context/tracking.json" ]; then
4743
+ python3 -c "
4744
+ import json
4745
+ try:
4746
+ t = json.load(open('.loki/context/tracking.json'))
4747
+ iters = t.get('per_iteration', [])
4748
+ match = [i for i in iters if i.get('iteration') == $iteration]
4749
+ if match:
4750
+ m = match[-1]
4751
+ print(
4752
+ m.get('input_tokens', 0),
4753
+ m.get('output_tokens', 0),
4754
+ m.get('cost_usd', 0),
4755
+ m.get('cache_read_tokens', 0),
4756
+ m.get('cache_creation_tokens', 0),
4757
+ )
4758
+ else:
4759
+ print(0, 0, 0, 0, 0)
4760
+ except Exception:
4761
+ print(0, 0, 0, 0, 0)
4762
+ " 2>/dev/null || echo "0 0 0 0 0"
4763
+ else
4764
+ echo "0 0 0 0 0"
4765
+ fi
4766
+ }
4767
+
4605
4768
  track_iteration_complete() {
4606
4769
  local iteration="$1"
4607
4770
  local exit_code="${2:-0}"
@@ -4684,32 +4847,14 @@ track_iteration_complete() {
4684
4847
  local phase="${LAST_KNOWN_PHASE:-}"
4685
4848
  [ -z "$phase" ] && phase=$(python3 -c "import json; print(json.load(open('.loki/state/orchestrator.json')).get('currentPhase', 'unknown'))" 2>/dev/null || echo "unknown")
4686
4849
 
4687
- # Read token data from context tracker output (v5.42.0)
4850
+ # Read token data, preferring Claude'\''s authoritative result-cost file over
4851
+ # the context-tracker estimate (v7.28.0 cost-capture fix). See
4852
+ # _read_iteration_cost for precedence rationale.
4688
4853
  # v6.82.0: also capture cache_read_tokens / cache_creation_tokens for
4689
4854
  # prompt-cache hit-rate analysis (S1.1 prompt restructure).
4690
4855
  local iter_input=0 iter_output=0 iter_cost=0
4691
4856
  local iter_cache_read=0 iter_cache_creation=0
4692
- if [ -f ".loki/context/tracking.json" ]; then
4693
- read iter_input iter_output iter_cost iter_cache_read iter_cache_creation < <(python3 -c "
4694
- import json
4695
- try:
4696
- t = json.load(open('.loki/context/tracking.json'))
4697
- iters = t.get('per_iteration', [])
4698
- match = [i for i in iters if i.get('iteration') == $iteration]
4699
- if match:
4700
- m = match[-1]
4701
- print(
4702
- m.get('input_tokens', 0),
4703
- m.get('output_tokens', 0),
4704
- m.get('cost_usd', 0),
4705
- m.get('cache_read_tokens', 0),
4706
- m.get('cache_creation_tokens', 0),
4707
- )
4708
- else:
4709
- print(0, 0, 0, 0, 0)
4710
- except: print(0, 0, 0, 0, 0)
4711
- " 2>/dev/null || echo "0 0 0 0 0")
4712
- fi
4857
+ read -r iter_input iter_output iter_cost iter_cache_read iter_cache_creation < <(_read_iteration_cost "$iteration")
4713
4858
 
4714
4859
  cat > ".loki/metrics/efficiency/iteration-${iteration}.json" << EFF_EOF
4715
4860
  {
@@ -6551,6 +6696,13 @@ print(counts[gate_name])
6551
6696
  loki_crash_friction "gate_failure" "gate=${gate_name} consecutive=${count}" >/dev/null 2>&1 || true
6552
6697
  fi
6553
6698
 
6699
+ # Trust-metrics: append a durable per-failure record so the gate-failure
6700
+ # distribution survives clear_gate_failure (which resets the running
6701
+ # counter). CRITICAL: this function's stdout IS its return value, so the
6702
+ # write is fully stdout-suppressed and best-effort; it cannot change the
6703
+ # echoed count or any gate behavior.
6704
+ record_trust_event_bash "gate_failure" "gate=${gate_name}" "consecutive=${count}" >/dev/null 2>&1 || true
6705
+
6554
6706
  echo "$count"
6555
6707
  }
6556
6708
 
@@ -7500,7 +7652,7 @@ BUILD_PROMPT
7500
7652
  --output-format text > "$review_output" 2>/dev/null
7501
7653
  ;;
7502
7654
  codex)
7503
- codex exec --full-auto "$prompt_text" \
7655
+ codex exec --full-auto --skip-git-repo-check "$prompt_text" \
7504
7656
  > "$review_output" 2>/dev/null
7505
7657
  ;;
7506
7658
  cline)
@@ -7715,7 +7867,7 @@ ADVERSARIAL_EOF
7715
7867
  ;;
7716
7868
  codex)
7717
7869
  if command -v codex &>/dev/null; then
7718
- codex exec --full-auto "$adversarial_prompt" \
7870
+ codex exec --full-auto --skip-git-repo-check "$adversarial_prompt" \
7719
7871
  > "$result_file" 2>/dev/null || true
7720
7872
  fi
7721
7873
  ;;
@@ -11900,6 +12052,19 @@ run_autonomous() {
11900
12052
  _LOKI_RUN_START_SHA="$(cat "$_start_sha_file" 2>/dev/null || echo "")"
11901
12053
  export _LOKI_RUN_START_SHA
11902
12054
 
12055
+ # Trust-metrics instrumentation marker: record one run_start event per
12056
+ # fresh run so the trust-metrics denominator counts ONLY instrumented runs.
12057
+ # This is what lets the aggregator distinguish "0 blocks measured" from
12058
+ # "this run predates instrumentation" (the central honesty rule). Additive,
12059
+ # best-effort, stdout-silent; never affects control flow. Mint a fresh
12060
+ # per-run id here and export it so every later event in this run shares it
12061
+ # (LOKI_SESSION_ID is absent on the `loki start` path).
12062
+ if [ "${ITERATION_COUNT:-0}" -eq 0 ]; then
12063
+ LOKI_TRUST_RUN_ID="$(_loki_trust_run_id --new)"
12064
+ export LOKI_TRUST_RUN_ID
12065
+ record_trust_event_bash "run_start" "start_sha=${_LOKI_RUN_START_SHA:-}" 2>/dev/null || true
12066
+ fi
12067
+
11903
12068
  # Notify dashboard of active project directory (for AI Chat cross-directory usage)
11904
12069
  if command -v curl &>/dev/null; then
11905
12070
  local project_cwd
@@ -12244,8 +12409,15 @@ except Exception as exc:
12244
12409
  claude)
12245
12410
  # Claude: Full features with stream-json output and agent tracking
12246
12411
  # Uses dynamic tier for model selection based on RARV phase
12247
- # Pass tier to Python via environment for dashboard display
12248
- { LOKI_CURRENT_MODEL="$tier_param" \
12412
+ # Pass tier + iteration to the embedded stream parser via the
12413
+ # environment. A bare `VAR=val cmd | parser` prefix applies ONLY
12414
+ # to `cmd` (claude) and does NOT cross the pipe to the parser
12415
+ # subprocess, so these must be exported into the shell env first.
12416
+ # LOKI_ITERATION lets the parser stamp the authoritative
12417
+ # result-cost file under the correct iteration index.
12418
+ export LOKI_CURRENT_MODEL="$tier_param"
12419
+ export LOKI_ITERATION="$ITERATION_COUNT"
12420
+ { \
12249
12421
  claude "${_loki_claude_argv[@]}" -p "$prompt" \
12250
12422
  --output-format stream-json --verbose 2>&1 | \
12251
12423
  tee -a "$log_file" "$agent_log" "$iter_output" | \
@@ -12558,6 +12730,34 @@ def process_stream():
12558
12730
  active_agents[orchestrator_id]["tasks_completed"].append(f"{tool_count} tools used")
12559
12731
 
12560
12732
  save_agents()
12733
+
12734
+ # Authoritative cost capture (path/slug/symlink-independent).
12735
+ # Claude'"'"'s result message carries its own total_cost_usd plus a
12736
+ # full usage object. The context-tracker session-file path is
12737
+ # brittle (slug derivation must guess Claude'"'"'s naming), so this
12738
+ # stamps the authoritative number to a per-iteration file that
12739
+ # the efficiency writer prefers. Best-effort: a malformed or
12740
+ # missing field must never break the iteration loop.
12741
+ try:
12742
+ _iter = os.environ.get("LOKI_ITERATION", "0")
12743
+ _u = data.get("usage", {}) or {}
12744
+ _rec = {
12745
+ "total_cost_usd": data.get("total_cost_usd"),
12746
+ "input_tokens": _u.get("input_tokens", 0),
12747
+ "output_tokens": _u.get("output_tokens", 0),
12748
+ "cache_read_tokens": _u.get("cache_read_input_tokens", 0),
12749
+ "cache_creation_tokens": _u.get("cache_creation_input_tokens", 0),
12750
+ }
12751
+ if _rec["total_cost_usd"] is not None:
12752
+ os.makedirs(".loki/metrics", exist_ok=True)
12753
+ _p = ".loki/metrics/result-cost-" + str(_iter) + ".json"
12754
+ _tmp = _p + ".tmp"
12755
+ with open(_tmp, "w") as _f:
12756
+ json.dump(_rec, _f)
12757
+ os.replace(_tmp, _p)
12758
+ except Exception:
12759
+ pass
12760
+
12561
12761
  print(f"\n{GREEN}[Session complete]{NC}", flush=True)
12562
12762
  is_error = data.get("is_error", False)
12563
12763
  sys.exit(1 if is_error else 0)
@@ -12586,7 +12786,7 @@ if __name__ == "__main__":
12586
12786
  # Uses dynamic tier from RARV phase (tier_param already set above)
12587
12787
  { LOKI_CODEX_REASONING_EFFORT="$tier_param" \
12588
12788
  CODEX_MODEL_REASONING_EFFORT="$tier_param" \
12589
- codex exec --full-auto \
12789
+ codex exec --full-auto --skip-git-repo-check \
12590
12790
  "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
12591
12791
  } && exit_code=0 || exit_code=$?
12592
12792
  ;;
@@ -12990,7 +13190,36 @@ if __name__ == "__main__":
12990
13190
  case "${gate_failures:-}" in
12991
13191
  *code_review,*|*code_review_ESCALATED*) _gate_block_for_completion="code_review" ;;
12992
13192
  esac
12993
- if [ -n "$_gate_block_for_completion" ] && check_completion_promise "$iter_output"; then
13193
+ # DROP-FIX (v7.28): check_completion_promise -> check_task_completion_signal
13194
+ # CONSUMES the completion signal (rm -f) on the FIRST successful call.
13195
+ # The completion-promise chain below calls it up to five times in one
13196
+ # iteration (reverify guard, code-review arm, evidence arm, held-out
13197
+ # arm, success arm), so the first call consumed the claim and every
13198
+ # later arm saw nothing -- the success arm never fired and the run
13199
+ # iterated to max_iterations even though the agent had claimed done.
13200
+ # Fix: evaluate the claim EXACTLY ONCE here, capture it in
13201
+ # _completion_claimed, and have every arm test that variable. The
13202
+ # single call discards stdout (matching the prior call sites, which
13203
+ # also discarded it), so the task_completion_claim event still emits
13204
+ # exactly once. Consumption semantics are preserved: the claim is
13205
+ # consumed when evaluated; if a gate rejects it, the agent must
13206
+ # re-claim next iteration (see internal/DEMO-CLAIM-DROP-BUG.md).
13207
+ local _completion_claimed=0
13208
+ if check_completion_promise "$iter_output"; then
13209
+ _completion_claimed=1
13210
+ fi
13211
+ # MEDIUM-3: this completion-promise route evaluates the council hard
13212
+ # gates (evidence + held-out) without the council_evaluate freshness
13213
+ # step, so the held-out gate could read stale verification statuses
13214
+ # (and a stale reservation). Re-verify the checklist ONCE here, but
13215
+ # only when a completion claim is actually present (mirror the
13216
+ # check_completion_promise condition used by the gate chain below) so
13217
+ # verification does not run every iteration. Type-guarded and
13218
+ # best-effort: failure must never block the completion path.
13219
+ if [ "$_completion_claimed" = 1 ] && type council_reverify_checklist &>/dev/null; then
13220
+ council_reverify_checklist 2>/dev/null || true
13221
+ fi
13222
+ if [ -n "$_gate_block_for_completion" ] && [ "$_completion_claimed" = 1 ]; then
12994
13223
  log_warn "Completion claim rejected: code review is BLOCKED for this iteration (Critical/High findings). Fix review issues before completion."
12995
13224
  log_warn " Review details under .loki/quality/reviews/ ; gate_failures=${gate_failures}"
12996
13225
  _gate_block_for_completion=""
@@ -13005,11 +13234,24 @@ if __name__ == "__main__":
13005
13234
  # LOKI_EVIDENCE_GATE=0 (council_evidence_gate returns 0 immediately
13006
13235
  # when disabled, so this branch never fires). Gate output (reason +
13007
13236
  # opt-out hint) is printed by council_evidence_gate itself.
13008
- elif check_completion_promise "$iter_output" && type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
13237
+ elif [ "$_completion_claimed" = 1 ] && type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
13009
13238
  log_warn "Completion claim rejected: evidence gate found no proof of completion (empty diff vs run-start SHA, or red tests)."
13010
13239
  log_warn " Details under .loki/council/evidence-block.json ; opt out with LOKI_EVIDENCE_GATE=0"
13011
13240
  # Fall through; keep iterating until there is real evidence.
13012
- elif check_completion_promise "$iter_output"; then
13241
+ # v7.28.0: the held-out spec-eval gate must also guard the DEFAULT
13242
+ # completion-promise route, not only the interval-gated council path
13243
+ # (council_evaluate). Otherwise an agent can self-assert "done" and
13244
+ # exit as completion_promise_fulfilled while a held-out acceptance
13245
+ # check is failing, bypassing the anti-reward-hacking gate entirely.
13246
+ # Mirrors the evidence-gate block above. Opt-out: the gate's own
13247
+ # LOKI_HELDOUT_GATE=0 (council_heldout_gate returns 0 immediately
13248
+ # when disabled or when no held-out items are reserved, so this
13249
+ # branch never fires). Gate output is printed by council_heldout_gate.
13250
+ elif [ "$_completion_claimed" = 1 ] && type council_heldout_gate &>/dev/null && ! council_heldout_gate; then
13251
+ log_warn "Completion claim rejected: held-out spec-eval gate found failing held-out acceptance check(s)."
13252
+ log_warn " Details under .loki/council/heldout-block.json ; opt out with LOKI_HELDOUT_GATE=0"
13253
+ # Fall through; keep iterating until the held-out checks pass.
13254
+ elif [ "$_completion_claimed" = 1 ]; then
13013
13255
  echo ""
13014
13256
  if [ -n "$COMPLETION_PROMISE" ]; then
13015
13257
  log_header "COMPLETION PROMISE FULFILLED: $COMPLETION_PROMISE"
@@ -13383,10 +13625,19 @@ check_human_intervention() {
13383
13625
  if [ -f "$loki_dir/signals/COUNCIL_REVIEW_REQUESTED" ]; then
13384
13626
  log_info "Council force-review requested from dashboard"
13385
13627
  rm -f "$loki_dir/signals/COUNCIL_REVIEW_REQUESTED"
13628
+ # MEDIUM-3: this route evaluates the council hard gates directly without
13629
+ # the council_evaluate freshness step, so re-verify the checklist ONCE
13630
+ # before the gate chain to restore that invariant (refreshes held-out
13631
+ # statuses and repairs a stale reservation). Type-guarded, best-effort.
13632
+ if type council_reverify_checklist &>/dev/null; then
13633
+ council_reverify_checklist 2>/dev/null || true
13634
+ fi
13386
13635
  if type council_checklist_gate &>/dev/null && ! council_checklist_gate; then
13387
13636
  log_info "Council force-review: blocked by checklist hard gate"
13388
13637
  elif type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
13389
13638
  log_info "Council force-review: blocked by evidence hard gate"
13639
+ elif type council_heldout_gate &>/dev/null && ! council_heldout_gate; then
13640
+ log_info "Council force-review: blocked by held-out spec-eval hard gate"
13390
13641
  elif type council_vote &>/dev/null && council_vote; then
13391
13642
  log_header "COMPLETION COUNCIL: FORCE REVIEW - PROJECT COMPLETE"
13392
13643
  # BUG #17 fix: Write COMPLETED marker, generate council report, and