loki-mode 7.40.0 → 7.41.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/autonomy/run.sh CHANGED
@@ -2560,20 +2560,24 @@ except Exception:
2560
2560
  [ -z "$branch" ] && branch="unknown"
2561
2561
  head_sha="$( (cd "${TARGET_DIR:-.}" && git rev-parse HEAD) 2>/dev/null || true )"
2562
2562
 
2563
+ # Finding #596 (HIGH): exclude .loki/ and .git/ from the summary diff/stat and
2564
+ # from the "Review the work" command we print, so the user is never told to
2565
+ # review a .loki-bloated diff and the displayed counts match the gated diff.
2566
+ local _summary_pathspec=(-- . ':(exclude).loki/' ':(exclude).git/' ':(exclude)**/.loki/**')
2563
2567
  if [ -n "$start_sha" ]; then
2564
- diff_stat="$( (cd "${TARGET_DIR:-.}" && git diff --stat "${start_sha}..HEAD") 2>/dev/null || true )"
2568
+ diff_stat="$( (cd "${TARGET_DIR:-.}" && git diff --stat "${start_sha}..HEAD" "${_summary_pathspec[@]}") 2>/dev/null || true )"
2565
2569
  # Parse the git diff --shortstat tail for counts (locale-stable enough
2566
2570
  # for our display; failures leave the zeros in place).
2567
2571
  local shortstat
2568
- shortstat="$( (cd "${TARGET_DIR:-.}" && git diff --shortstat "${start_sha}..HEAD") 2>/dev/null || true )"
2572
+ shortstat="$( (cd "${TARGET_DIR:-.}" && git diff --shortstat "${start_sha}..HEAD" "${_summary_pathspec[@]}") 2>/dev/null || true )"
2569
2573
  if [ -n "$shortstat" ]; then
2570
2574
  files_changed="$(printf '%s\n' "$shortstat" | grep -oE '[0-9]+ file' | grep -oE '[0-9]+' | head -1)"
2571
2575
  insertions="$(printf '%s\n' "$shortstat" | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' | head -1)"
2572
2576
  deletions="$(printf '%s\n' "$shortstat" | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' | head -1)"
2573
2577
  fi
2574
- review_cmd="git diff ${start_sha}..HEAD"
2578
+ review_cmd="git diff ${start_sha}..HEAD -- . ':(exclude).loki/'"
2575
2579
  else
2576
- review_cmd="git diff HEAD"
2580
+ review_cmd="git diff HEAD -- . ':(exclude).loki/'"
2577
2581
  fi
2578
2582
  [ -z "$files_changed" ] && files_changed=0
2579
2583
  [ -z "$insertions" ] && insertions=0
@@ -3050,9 +3054,24 @@ spawn_worktree_session() {
3050
3054
  # Provider-specific invocation for parallel sessions
3051
3055
  case "${PROVIDER_NAME:-claude}" in
3052
3056
  claude)
3053
- claude --dangerously-skip-permissions \
3054
- -p "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
3055
- >> "$log_file" 2>&1 || _wt_exit=$?
3057
+ # caveman ACTIVATION (free-form): a parallel worktree dev stream
3058
+ # is free-form generation; its output goes to a log, not a parsed
3059
+ # sentinel. Activate compression at the configured level when
3060
+ # warranted (empty -> bare invocation, byte-identical to before).
3061
+ # Type-guarded; inlined per-invocation (not exported).
3062
+ local _loki_wt_cm=""
3063
+ if type loki_caveman_activate_env >/dev/null 2>&1; then
3064
+ _loki_wt_cm="$(loki_caveman_activate_env)"
3065
+ fi
3066
+ if [ -n "$_loki_wt_cm" ]; then
3067
+ CAVEMAN_DEFAULT_MODE="$_loki_wt_cm" claude --dangerously-skip-permissions \
3068
+ -p "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
3069
+ >> "$log_file" 2>&1 || _wt_exit=$?
3070
+ else
3071
+ claude --dangerously-skip-permissions \
3072
+ -p "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
3073
+ >> "$log_file" 2>&1 || _wt_exit=$?
3074
+ fi
3056
3075
  ;;
3057
3076
  codex)
3058
3077
  codex exec --full-auto --skip-git-repo-check \
@@ -3264,7 +3283,11 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
3264
3283
  if type loki_subcall_bare_enabled >/dev/null 2>&1 && loki_subcall_bare_enabled; then
3265
3284
  _cr_argv+=("--bare")
3266
3285
  fi
3267
- resolution=$(claude "${_cr_argv[@]}" -p "$conflict_prompt" --output-format text 2>/dev/null)
3286
+ # caveman HARD-SUPPRESS (parsed output): the output is captured as
3287
+ # the EXACT resolved file content (the shell writes it verbatim).
3288
+ # Compressing prose into the merged source would corrupt the file,
3289
+ # so disable caveman unconditionally here. No-op when absent.
3290
+ resolution=$(CAVEMAN_DEFAULT_MODE=off claude "${_cr_argv[@]}" -p "$conflict_prompt" --output-format text 2>/dev/null)
3268
3291
  ;;
3269
3292
  codex)
3270
3293
  resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
@@ -7147,6 +7170,11 @@ enforce_test_coverage() {
7147
7170
  cat > "$quality_dir/test-results.json" << TREOF
7148
7171
  {"timestamp":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","runner":"none","pass":true,"summary":"No test runner detected"}
7149
7172
  TREOF
7173
+ # Finding #598: stamp the per-iteration freshness marker so a later
7174
+ # completion-route capture (ensure_completion_test_evidence) reuses this
7175
+ # run instead of re-running the suite. Single source of truth for "tests
7176
+ # ran this iteration", set on every return path that writes results.
7177
+ printf '%s\n' "${ITERATION_COUNT:-0}" > "$quality_dir/.test-results.iter" 2>/dev/null || true
7150
7178
  return 0
7151
7179
  fi
7152
7180
 
@@ -7156,6 +7184,8 @@ TREOF
7156
7184
  cat > "$quality_dir/test-results.json" << TREOF
7157
7185
  {"timestamp":"$(date -u +%Y-%m-%dT%H:%M:%SZ)","runner":"$test_runner","pass":$test_passed,"min_coverage":$min_coverage,"summary":"$details"}
7158
7186
  TREOF
7187
+ # Finding #598: stamp the per-iteration freshness marker (see above).
7188
+ printf '%s\n' "${ITERATION_COUNT:-0}" > "$quality_dir/.test-results.iter" 2>/dev/null || true
7159
7189
 
7160
7190
  if [ "$test_passed" = "true" ]; then
7161
7191
  touch "$quality_dir/unit-tests.pass"
@@ -7170,6 +7200,82 @@ TREOF
7170
7200
  fi
7171
7201
  }
7172
7202
 
7203
+ # ============================================================================
7204
+ # Finding #598 (HIGH): ensure REAL test evidence exists before the
7205
+ # verified-completion evidence gate runs on a completion claim.
7206
+ #
7207
+ # The evidence gate (council_evidence_gate) blocks completion iff the diff is
7208
+ # empty OR tests are red. The test axis reads .loki/quality/test-results.json.
7209
+ # When that file is ABSENT or inconclusive (runner==none / unparsable), the gate
7210
+ # treats the test axis as pass-through, so completion could be claimed on a
7211
+ # nonzero diff alone with NO test evidence -- half-blind. This happens whenever
7212
+ # enforce_test_coverage did not run this iteration, e.g. LOKI_HARD_GATES=false or
7213
+ # PHASE_UNIT_TESTS=false (the gate at the quality-gate ladder is skipped) while
7214
+ # the completion-promise route still fires the evidence gate.
7215
+ #
7216
+ # Rather than letting absent evidence pass (Option 1, which would live in the
7217
+ # off-limits completion-council.sh), we GENERATE real evidence here (Option 2,
7218
+ # preferred + autonomous): run the project's own test command via the existing
7219
+ # detect-and-run enforce_test_coverage, which persists a fresh test-results.json.
7220
+ # The gate then reads true PASS/FAIL. If no test runner truly exists, the file
7221
+ # records runner:none and the test axis legitimately stays pass-through.
7222
+ #
7223
+ # Behavior:
7224
+ # - Default ON. Opt out with LOKI_COMPLETION_TEST_CAPTURE=0.
7225
+ # - Cheap: skips when a fresh test-results.json already exists for THIS
7226
+ # iteration (freshness marker .loki/quality/.test-results.iter), so we never
7227
+ # re-run the suite the quality-gate ladder already ran.
7228
+ # - Best-effort: enforce_test_coverage returns nonzero on red tests; that is
7229
+ # EXPECTED and must not crash the completion path. The gate is the decider,
7230
+ # so we always swallow the rc with `|| true` and let the gate read the file.
7231
+ # - CWD invariant: enforce_test_coverage writes ${TARGET_DIR}/.loki/...; the
7232
+ # gate reads .loki/... relative to CWD. Both are invoked from the same loop
7233
+ # body where CWD == TARGET_DIR (or TARGET_DIR=="."), matching the existing
7234
+ # gate call sites.
7235
+ # ============================================================================
7236
+ ensure_completion_test_evidence() {
7237
+ [ "${LOKI_COMPLETION_TEST_CAPTURE:-1}" = "0" ] && return 0
7238
+ type enforce_test_coverage &>/dev/null || return 0
7239
+
7240
+ local loki_dir="${TARGET_DIR:-.}/.loki"
7241
+ local quality_dir="$loki_dir/quality"
7242
+ local tr_file="$quality_dir/test-results.json"
7243
+ local iter_marker="$quality_dir/.test-results.iter"
7244
+ local this_iter="${ITERATION_COUNT:-0}"
7245
+
7246
+ # Freshness guard: if results already exist for this iteration, reuse them.
7247
+ if [ -f "$tr_file" ] && [ -f "$iter_marker" ]; then
7248
+ local marked
7249
+ marked="$(cat "$iter_marker" 2>/dev/null || echo "")"
7250
+ if [ "$marked" = "$this_iter" ]; then
7251
+ log_info "Completion test evidence: reusing this iteration's test-results.json"
7252
+ return 0
7253
+ fi
7254
+ fi
7255
+
7256
+ log_info "Completion test evidence: capturing fresh test results before evidence gate (opt out: LOKI_COMPLETION_TEST_CAPTURE=0)"
7257
+ # Record the test-results.json mtime BEFORE capture so we only mark this
7258
+ # iteration "fresh" if enforce_test_coverage actually (re)wrote the file.
7259
+ # Guards LOW-2 (bug-hunt): if capture is interrupted before writing while a
7260
+ # prior-iteration file exists, the marker must NOT advance and let stale
7261
+ # evidence read as fresh. Window is narrow but the check is cheap.
7262
+ local _results_file="$quality_dir/test-results.json"
7263
+ local _mtime_before=""
7264
+ [ -f "$_results_file" ] && _mtime_before=$(stat -f %m "$_results_file" 2>/dev/null || stat -c %Y "$_results_file" 2>/dev/null || echo "")
7265
+ # The gate decides on the persisted file; a red suite (nonzero rc) is expected
7266
+ # and must not abort the completion path here.
7267
+ enforce_test_coverage || true
7268
+ mkdir -p "$quality_dir" 2>/dev/null || true
7269
+ local _mtime_after=""
7270
+ [ -f "$_results_file" ] && _mtime_after=$(stat -f %m "$_results_file" 2>/dev/null || stat -c %Y "$_results_file" 2>/dev/null || echo "")
7271
+ # Only advance the freshness marker when the results file was actually
7272
+ # produced/updated by THIS capture (mtime advanced or file newly created).
7273
+ if [ -n "$_mtime_after" ] && [ "$_mtime_after" != "$_mtime_before" ]; then
7274
+ printf '%s\n' "$this_iter" > "$iter_marker" 2>/dev/null || true
7275
+ fi
7276
+ return 0
7277
+ }
7278
+
7173
7279
  # ============================================================================
7174
7280
  # Documentation Staleness Check (v6.75.0)
7175
7281
  # Checks if generated documentation is stale relative to HEAD
@@ -7233,7 +7339,11 @@ run_doc_quality_gate() {
7233
7339
  fi
7234
7340
  else
7235
7341
  score=$((score - 10))
7236
- issues+=("No generated documentation found (run 'loki docs generate')")
7342
+ if [ "${LOKI_AUTO_DOCS:-true}" = "true" ]; then
7343
+ issues+=("No generated documentation found (auto-generation did not complete)")
7344
+ else
7345
+ issues+=("No generated documentation found (auto-docs disabled; run 'loki docs generate')")
7346
+ fi
7237
7347
  fi
7238
7348
 
7239
7349
  # Check 3: Package documentation (for npm/pip packages)
@@ -7258,6 +7368,52 @@ run_doc_quality_gate() {
7258
7368
  [ "$score" -ge 70 ]
7259
7369
  }
7260
7370
 
7371
+ # ============================================================================
7372
+ # Auto-Documentation Generation (intelligent default)
7373
+ # Generates the .loki/docs/ suite before the documentation gate evaluates so
7374
+ # the gate scores on real generated docs instead of nagging the user to run
7375
+ # 'loki docs generate' by hand. Default-on; opt out with LOKI_AUTO_DOCS=false.
7376
+ #
7377
+ # Bounded: runs at most once per run when docs are missing, and again only
7378
+ # when the existing docs are >10 commits stale (the same threshold the gate
7379
+ # and staleness check use). 'loki docs generate' writes its manifest
7380
+ # unconditionally (template fallback when no provider), so the missing-docs
7381
+ # trigger fires exactly once. Best-effort: never fails the iteration loop.
7382
+ # ============================================================================
7383
+
7384
+ auto_generate_docs_if_needed() {
7385
+ [ "${LOKI_AUTO_DOCS:-true}" = "true" ] || return 0
7386
+
7387
+ local project_dir="${TARGET_DIR:-.}"
7388
+ local manifest="$project_dir/.loki/docs/docs-manifest.json"
7389
+ local needs_gen=false
7390
+
7391
+ if [ ! -f "$manifest" ]; then
7392
+ needs_gen=true
7393
+ else
7394
+ # Regenerate only when the existing docs are substantially stale.
7395
+ local doc_sha
7396
+ doc_sha=$(python3 -c "import json; print(json.load(open('$manifest')).get('git_sha', ''))" 2>/dev/null)
7397
+ if [ -n "$doc_sha" ]; then
7398
+ local behind
7399
+ behind=$(git -C "$project_dir" rev-list --count "$doc_sha..HEAD" 2>/dev/null || echo "0")
7400
+ [ "$behind" -gt 10 ] && needs_gen=true
7401
+ fi
7402
+ fi
7403
+
7404
+ [ "$needs_gen" = "true" ] || return 0
7405
+
7406
+ local loki_bin="$SCRIPT_DIR/loki"
7407
+ [ -x "$loki_bin" ] || return 0
7408
+
7409
+ log_info "Auto-documentation: generating .loki/docs/ before documentation gate..."
7410
+ # Synchronous so docs exist before the gate scores. Provider-agnostic:
7411
+ # 'loki docs generate' picks the run's provider and falls back to
7412
+ # template-based docs when no provider CLI is available.
7413
+ "$loki_bin" docs generate "$project_dir" >/dev/null 2>&1 || \
7414
+ log_warn "Auto-documentation: generation did not complete (gate will score on what exists)"
7415
+ }
7416
+
7261
7417
  # ============================================================================
7262
7418
  # Magic Modules Debate Gate - Gate 12 (v6.77.0)
7263
7419
  # Runs when any .loki/magic/specs/*.md changed since last iteration.
@@ -7564,16 +7720,28 @@ run_code_review() {
7564
7720
  review_id="review-$(date -u +%Y%m%dT%H%M%SZ)-${ITERATION_COUNT:-0}"
7565
7721
  mkdir -p "$review_dir/$review_id"
7566
7722
 
7567
- # Get diff from last commit (staged changes)
7723
+ # Get diff from last commit (staged changes).
7724
+ #
7725
+ # Finding #596 (HIGH): exclude .loki/ and .git/ from the review diff via git
7726
+ # pathspec. When .loki/ is git-tracked the diff bloats (observed 2.18MB of
7727
+ # runtime state), the reviewer prompt overflows, the model returns EMPTY, and
7728
+ # every reviewer records NO_OUTPUT -> the gate passes with ZERO real review.
7729
+ # The evidence gate already excludes .loki/ (see the grep -vE near the
7730
+ # porcelain read); mirror that here so the code-review gate is never defeated
7731
+ # by Loki's own state. Behavior is identical when .loki/ is untracked (the
7732
+ # common case) because git ignores the exclude pathspec for paths it does not
7733
+ # track. ':(exclude).git/' is harmless (git never diffs .git/) and is kept
7734
+ # only for parity with the evidence-gate exclusion list.
7735
+ local _review_pathspec=(-- . ':(exclude).loki/' ':(exclude).git/' ':(exclude)**/.loki/**')
7568
7736
  local diff_content
7569
- diff_content=$(git -C "${TARGET_DIR:-.}" diff HEAD~1 2>/dev/null || git -C "${TARGET_DIR:-.}" diff --cached 2>/dev/null || echo "")
7737
+ diff_content=$(git -C "${TARGET_DIR:-.}" diff HEAD~1 "${_review_pathspec[@]}" 2>/dev/null || git -C "${TARGET_DIR:-.}" diff --cached "${_review_pathspec[@]}" 2>/dev/null || echo "")
7570
7738
  if [ -z "$diff_content" ]; then
7571
7739
  log_info "Code review: No diff to review, skipping"
7572
7740
  return 0
7573
7741
  fi
7574
7742
 
7575
7743
  local changed_files
7576
- changed_files=$(git -C "${TARGET_DIR:-.}" diff --name-only HEAD~1 2>/dev/null || git -C "${TARGET_DIR:-.}" diff --name-only --cached 2>/dev/null || echo "")
7744
+ changed_files=$(git -C "${TARGET_DIR:-.}" diff --name-only HEAD~1 "${_review_pathspec[@]}" 2>/dev/null || git -C "${TARGET_DIR:-.}" diff --name-only --cached "${_review_pathspec[@]}" 2>/dev/null || echo "")
7577
7745
 
7578
7746
  log_header "CODE REVIEW: $review_id"
7579
7747
 
@@ -7912,6 +8080,15 @@ BUILD_PROMPT
7912
8080
  if type loki_review_allowlist_enabled >/dev/null 2>&1 && loki_review_allowlist_enabled; then
7913
8081
  _rv_argv+=("--allowedTools" "$(loki_review_allowlist)")
7914
8082
  fi
8083
+ # caveman HARD-SUPPRESS (parsed output): this is a trust-gate
8084
+ # subcall whose output is parsed for "^VERDICT:" + findings. A
8085
+ # globally-active caveman would compress/reword that line and
8086
+ # silently flip the verdict, so we UNCONDITIONALLY disable
8087
+ # caveman here with CAVEMAN_DEFAULT_MODE=off (the activate hook
8088
+ # then deletes its flag and emits nothing). Set inline, not via
8089
+ # the helper, so the carve-out holds even when the helper is
8090
+ # out of scope. No-op when caveman is absent.
8091
+ CAVEMAN_DEFAULT_MODE=off \
7915
8092
  claude "${_rv_argv[@]}" -p "$prompt_text" \
7916
8093
  --output-format text > "$review_output" 2>/dev/null
7917
8094
  ;;
@@ -7952,6 +8129,14 @@ BUILD_PROMPT
7952
8129
  local pass_count=0
7953
8130
  local fail_count=0
7954
8131
  local verdicts_summary=""
8132
+ # Finding #596 FIX A2 (HIGH): count REAL verdicts (a reviewer file that
8133
+ # exists, is non-empty, AND carries a recognized VERDICT: PASS|FAIL line).
8134
+ # A review where every reviewer produced no usable verdict (all NO_OUTPUT,
8135
+ # e.g. the model returned EMPTY because a .loki-bloated prompt overflowed)
8136
+ # must NOT silently pass with pass_count=0/fail_count=0/has_blocking=false.
8137
+ # Such a review proves nothing, so we treat it as INCONCLUSIVE -> blocking.
8138
+ local real_verdict_count=0
8139
+ local no_output_count=0
7955
8140
 
7956
8141
  for i in $(seq 0 $((reviewer_count - 1))); do
7957
8142
  local reviewer_name
@@ -7961,6 +8146,7 @@ BUILD_PROMPT
7961
8146
  if [ ! -f "$review_output" ] || [ ! -s "$review_output" ]; then
7962
8147
  log_warn "Reviewer $reviewer_name produced no output"
7963
8148
  verdicts_summary="${verdicts_summary}${reviewer_name}:NO_OUTPUT "
8149
+ ((no_output_count++))
7964
8150
  continue
7965
8151
  fi
7966
8152
 
@@ -7968,6 +8154,22 @@ BUILD_PROMPT
7968
8154
  local verdict
7969
8155
  verdict=$(grep -i "^VERDICT:" "$review_output" | head -1 | sed 's/^VERDICT:[[:space:]]*//' | tr '[:lower:]' '[:upper:]' | tr -d '[:space:]')
7970
8156
 
8157
+ # FIX A2: a "real verdict" is the PRESENCE of a non-empty VERDICT: line,
8158
+ # not a specific token. A non-empty file with NO VERDICT line (garbage or
8159
+ # a truncated reply) previously counted as PASS and could approve the gate
8160
+ # on a meaningless file; now it is a non-verdict (not real, not a pass).
8161
+ # We deliberately keep the original non-FAIL=pass semantics for any file
8162
+ # that DOES carry a verdict line (PASS, APPROVE, "PASS with concerns",
8163
+ # etc. all count as pass) so verbose-but-real verdicts are never
8164
+ # false-blocked. The only added block relative to shipped behavior is the
8165
+ # zero-real-verdicts (all-empty) case.
8166
+ if [ -z "$verdict" ]; then
8167
+ log_warn "Reviewer $reviewer_name produced no VERDICT line (empty or unparseable reply)"
8168
+ verdicts_summary="${verdicts_summary}${reviewer_name}:NO_VERDICT "
8169
+ ((no_output_count++))
8170
+ continue
8171
+ fi
8172
+ ((real_verdict_count++))
7971
8173
  if [ "$verdict" = "FAIL" ]; then
7972
8174
  ((fail_count++))
7973
8175
  # Check for Critical/High severity findings
@@ -7984,6 +8186,23 @@ BUILD_PROMPT
7984
8186
  verdicts_summary="${verdicts_summary}${reviewer_name}:${verdict:-UNKNOWN} "
7985
8187
  done
7986
8188
 
8189
+ # Finding #596 FIX A2: zero real verdicts when reviewers were expected =>
8190
+ # INCONCLUSIVE => blocking. Optional bounded retry first (LOKI_REVIEW_RETRY=1,
8191
+ # default on) so a transient empty-output blip does not hard-block; the retry
8192
+ # re-runs the whole review with the (now .loki-excluded) diff. Opt out of the
8193
+ # block entirely with LOKI_REVIEW_INCONCLUSIVE_BLOCK=0 (records, never blocks).
8194
+ local review_inconclusive=false
8195
+ if [ "$reviewer_count" -gt 0 ] && [ "$real_verdict_count" -eq 0 ]; then
8196
+ review_inconclusive=true
8197
+ log_error "CODE REVIEW INCONCLUSIVE: 0 of $reviewer_count reviewers returned a usable verdict (no_output=$no_output_count)"
8198
+ log_error " An all-empty review proves nothing; refusing to pass the gate on zero real verdicts."
8199
+ if [ "${LOKI_REVIEW_RETRY:-1}" = "1" ] && [ "${_LOKI_REVIEW_RETRYING:-0}" != "1" ]; then
8200
+ log_warn " Retrying code review once (LOKI_REVIEW_RETRY=1)..."
8201
+ _LOKI_REVIEW_RETRYING=1 run_code_review
8202
+ return $?
8203
+ fi
8204
+ fi
8205
+
7987
8206
  # Save aggregate results via python3 + env vars (no shell interpolation in JSON)
7988
8207
  export LOKI_REVIEW_AGG_FILE="$review_dir/$review_id/aggregate.json"
7989
8208
  export LOKI_REVIEW_AGG_ID="$review_id"
@@ -7992,6 +8211,8 @@ BUILD_PROMPT
7992
8211
  export LOKI_REVIEW_AGG_FAIL="$fail_count"
7993
8212
  export LOKI_REVIEW_AGG_BLOCKING="$has_blocking"
7994
8213
  export LOKI_REVIEW_AGG_VERDICTS="$verdicts_summary"
8214
+ export LOKI_REVIEW_AGG_REAL="$real_verdict_count"
8215
+ export LOKI_REVIEW_AGG_INCONCLUSIVE="$review_inconclusive"
7995
8216
  python3 << 'AGG_SCRIPT'
7996
8217
  import json, os
7997
8218
  result = {
@@ -8000,6 +8221,8 @@ result = {
8000
8221
  "pass_count": int(os.environ["LOKI_REVIEW_AGG_PASS"]),
8001
8222
  "fail_count": int(os.environ["LOKI_REVIEW_AGG_FAIL"]),
8002
8223
  "has_blocking": os.environ["LOKI_REVIEW_AGG_BLOCKING"] == "true",
8224
+ "real_verdict_count": int(os.environ["LOKI_REVIEW_AGG_REAL"]),
8225
+ "inconclusive": os.environ["LOKI_REVIEW_AGG_INCONCLUSIVE"] == "true",
8003
8226
  "verdicts": os.environ["LOKI_REVIEW_AGG_VERDICTS"].strip()
8004
8227
  }
8005
8228
  with open(os.environ["LOKI_REVIEW_AGG_FILE"], "w") as f:
@@ -8007,12 +8230,15 @@ with open(os.environ["LOKI_REVIEW_AGG_FILE"], "w") as f:
8007
8230
  AGG_SCRIPT
8008
8231
  unset LOKI_REVIEW_AGG_FILE LOKI_REVIEW_AGG_ID LOKI_REVIEW_AGG_ITER
8009
8232
  unset LOKI_REVIEW_AGG_PASS LOKI_REVIEW_AGG_FAIL LOKI_REVIEW_AGG_BLOCKING LOKI_REVIEW_AGG_VERDICTS
8233
+ unset LOKI_REVIEW_AGG_REAL LOKI_REVIEW_AGG_INCONCLUSIVE
8010
8234
 
8011
8235
  emit_event_json "code_review_complete" \
8012
8236
  "review_id=$review_id" \
8013
8237
  "pass_count=$pass_count" \
8014
8238
  "fail_count=$fail_count" \
8015
8239
  "has_blocking=$has_blocking" \
8240
+ "real_verdict_count=$real_verdict_count" \
8241
+ "inconclusive=$review_inconclusive" \
8016
8242
  "iteration=$ITERATION_COUNT"
8017
8243
 
8018
8244
  # Anti-sycophancy check: unanimous PASS is suspicious
@@ -8031,6 +8257,20 @@ AGG_SCRIPT
8031
8257
  return 1
8032
8258
  fi
8033
8259
 
8260
+ # Finding #596 FIX A2: an inconclusive review (zero real verdicts, retry
8261
+ # already exhausted or disabled) blocks unless explicitly opted out. This is
8262
+ # the 'verified before done' promise: a review that produced no usable verdict
8263
+ # cannot stand in for a real review.
8264
+ if [ "$review_inconclusive" = "true" ]; then
8265
+ if [ "${LOKI_REVIEW_INCONCLUSIVE_BLOCK:-1}" = "0" ]; then
8266
+ log_warn "Code review inconclusive (0/$reviewer_count real verdicts) but LOKI_REVIEW_INCONCLUSIVE_BLOCK=0 - not blocking"
8267
+ return 0
8268
+ fi
8269
+ log_error "CODE REVIEW BLOCKED: inconclusive (0/$reviewer_count reviewers returned a usable verdict)"
8270
+ log_error " Review details: $review_dir/$review_id/ ; opt out with LOKI_REVIEW_INCONCLUSIVE_BLOCK=0"
8271
+ return 1
8272
+ fi
8273
+
8034
8274
  log_info "Code review passed ($pass_count/$reviewer_count PASS, $fail_count FAIL - no blocking issues)"
8035
8275
  return 0
8036
8276
  }
@@ -8151,6 +8391,11 @@ ADVERSARIAL_EOF
8151
8391
  if type loki_review_allowlist_enabled >/dev/null 2>&1 && loki_review_allowlist_enabled; then
8152
8392
  _adv_argv+=("--allowedTools" "$(loki_review_allowlist)")
8153
8393
  fi
8394
+ # caveman HARD-SUPPRESS (parsed output): the adversarial probe's
8395
+ # output is parsed for findings/severity. Disable caveman
8396
+ # unconditionally (CAVEMAN_DEFAULT_MODE=off) so compression cannot
8397
+ # drop or reword a finding. No-op when caveman is absent.
8398
+ CAVEMAN_DEFAULT_MODE=off \
8154
8399
  claude "${_adv_argv[@]}" -p "$adversarial_prompt" \
8155
8400
  --output-format text > "$result_file" 2>/dev/null || true
8156
8401
  fi
@@ -10188,9 +10433,14 @@ ${_commits}"
10188
10433
  _ic_argv+=("--bare")
10189
10434
  fi
10190
10435
  _ic_argv+=("--model" "haiku")
10436
+ # caveman HARD-SUPPRESS (parsed output): the regen output is validated to be
10437
+ # Markdown (grep '^#') and written verbatim to USAGE.md. Compressed prose
10438
+ # would fail that check or produce caveman-style USAGE text, so disable
10439
+ # caveman unconditionally. Inlined on `claude` only (does not cross the pipe
10440
+ # to head). No-op when caveman is absent.
10191
10441
  local _ic_out
10192
10442
  _ic_out=$(printf '%s' "$_ic_prompt" \
10193
- | timeout 60 claude "${_ic_argv[@]}" -p - 2>/dev/null \
10443
+ | timeout 60 env CAVEMAN_DEFAULT_MODE=off claude "${_ic_argv[@]}" -p - 2>/dev/null \
10194
10444
  | head -200)
10195
10445
  # Sanity check: response must look like Markdown (starts with # or ##).
10196
10446
  if [ -z "$_ic_out" ] || ! printf '%s' "$_ic_out" | head -1 | grep -qE '^#'; then
@@ -13049,9 +13299,40 @@ except Exception as exc:
13049
13299
  # result-cost file under the correct iteration index.
13050
13300
  export LOKI_CURRENT_MODEL="$tier_param"
13051
13301
  export LOKI_ITERATION="$ITERATION_COUNT"
13302
+ # caveman ACTIVATION (free-form): the main RARV dev loop is
13303
+ # free-form generation, so we ask caveman to compress its OUTPUT
13304
+ # tokens at the configured level. Inlined as a per-invocation env
13305
+ # prefix (NOT exported) so it applies ONLY to `claude` (and the
13306
+ # SessionStart hook it spawns inherits it) and never bleeds into
13307
+ # later parsed subcalls. Empty when caveman is unsupported /
13308
+ # disabled / the legacy completion-prose match is active, in which
13309
+ # case the invocation is byte-identical to before. Type-guarded so
13310
+ # an older runtime without the helper degrades cleanly.
13311
+ local _loki_cm_level=""
13312
+ if type loki_caveman_activate_env >/dev/null 2>&1; then
13313
+ _loki_cm_level="$(loki_caveman_activate_env)"
13314
+ fi
13315
+ # Best-effort one-time bootstrap when activation is warranted but
13316
+ # caveman is not yet installed (idempotent, non-blocking, clean
13317
+ # degrade). The level stays usable next run even if this run is
13318
+ # uncompressed.
13319
+ if [ -n "$_loki_cm_level" ] && type loki_caveman_bootstrap >/dev/null 2>&1; then
13320
+ loki_caveman_bootstrap || true
13321
+ fi
13322
+ # NOTE: an EMPTY CAVEMAN_DEFAULT_MODE is NOT inert -- caveman's
13323
+ # getDefaultMode() treats empty as unset and falls back to the
13324
+ # user's global default (often "full"). So when activation is not
13325
+ # warranted we must NOT set the var at all (the bare branch),
13326
+ # keeping the invocation byte-identical to pre-caveman behavior.
13052
13327
  { \
13328
+ if [ -n "$_loki_cm_level" ]; then
13329
+ CAVEMAN_DEFAULT_MODE="$_loki_cm_level" \
13330
+ claude "${_loki_claude_argv[@]}" -p "$prompt" \
13331
+ --output-format stream-json --verbose 2>&1
13332
+ else
13053
13333
  claude "${_loki_claude_argv[@]}" -p "$prompt" \
13054
- --output-format stream-json --verbose 2>&1 | \
13334
+ --output-format stream-json --verbose 2>&1
13335
+ fi | \
13055
13336
  tee -a "$log_file" "$agent_log" "$iter_output" | \
13056
13337
  python3 -u -c '
13057
13338
  import sys
@@ -13689,6 +13970,12 @@ if __name__ == "__main__":
13689
13970
  fi
13690
13971
  fi
13691
13972
  fi
13973
+ # Auto-generate docs (default-on) BEFORE the staleness check and the
13974
+ # gate, so neither nags the user to run 'loki docs generate' by hand.
13975
+ # Opt out with LOKI_AUTO_DOCS=false.
13976
+ if [ "$ITERATION_COUNT" -gt 0 ]; then
13977
+ auto_generate_docs_if_needed
13978
+ fi
13692
13979
  # Documentation staleness check (v6.75.0)
13693
13980
  if [ "$ITERATION_COUNT" -gt 0 ]; then
13694
13981
  run_doc_staleness_check
@@ -13789,6 +14076,15 @@ if __name__ == "__main__":
13789
14076
  # Completion Council check (v5.25.0) - multi-agent voting on completion
13790
14077
  # Runs before completion promise check since council is more comprehensive
13791
14078
  log_step "Post-iteration: checking completion council..."
14079
+ # Finding #598 (HIGH): council_should_stop calls council_evidence_gate
14080
+ # internally; ensure fresh test evidence exists first so its test axis
14081
+ # is not half-blind when the quality-gate ladder did not run this
14082
+ # iteration (e.g. LOKI_HARD_GATES=false). Idempotent: the freshness
14083
+ # guard reuses results the ladder already wrote in the common case, so
14084
+ # tests are never run twice per iteration. Best-effort, never blocks.
14085
+ if type ensure_completion_test_evidence &>/dev/null; then
14086
+ ensure_completion_test_evidence || true
14087
+ fi
13792
14088
  if type council_should_stop &>/dev/null && council_should_stop; then
13793
14089
  echo ""
13794
14090
  log_header "COMPLETION COUNCIL: PROJECT COMPLETE"
@@ -13851,6 +14147,15 @@ if __name__ == "__main__":
13851
14147
  if [ "$_completion_claimed" = 1 ] && type council_reverify_checklist &>/dev/null; then
13852
14148
  council_reverify_checklist 2>/dev/null || true
13853
14149
  fi
14150
+ # Finding #598 (HIGH): generate real test evidence before the evidence
14151
+ # gate fires, so the gate's test axis is never half-blind on absent
14152
+ # test-results.json. Only on an actual completion claim (mirrors the
14153
+ # reverify guard above) so the suite does not run every iteration.
14154
+ # Type-guarded + best-effort: never blocks the completion path itself;
14155
+ # the evidence gate below is the decider that reads the file.
14156
+ if [ "$_completion_claimed" = 1 ] && type ensure_completion_test_evidence &>/dev/null; then
14157
+ ensure_completion_test_evidence || true
14158
+ fi
13854
14159
  if [ -n "$_gate_block_for_completion" ] && [ "$_completion_claimed" = 1 ]; then
13855
14160
  log_warn "Completion claim rejected: code review is BLOCKED for this iteration (Critical/High findings). Fix review issues before completion."
13856
14161
  log_warn " Review details under .loki/quality/reviews/ ; gate_failures=${gate_failures}"
@@ -14130,6 +14435,89 @@ kill_provider_child() {
14130
14435
  return 1
14131
14436
  }
14132
14437
 
14438
+ # Authoritative self-reap of THIS run's process group on a normal completion.
14439
+ #
14440
+ # Why this exists: a normal completion (council stop / max-iterations /
14441
+ # completion promise) returns from run_autonomous() into main()'s cleanup
14442
+ # block, which reaps the app-runner but NOT the orchestrator's own process
14443
+ # group. The provider agent (claude/codex/...) and any subagents it spawned
14444
+ # share the orchestrator's group; if one detached or was reparented to init,
14445
+ # it survived the `exit` and kept consuming CPU (observed: ~27 min orphan).
14446
+ # The external `loki stop` path (autonomy/loki) already reaps the whole group
14447
+ # via the recorded pgid; this brings the SAME authoritative reap to the
14448
+ # completion path so a clean finish leaves no orphans.
14449
+ #
14450
+ # Foreign-run safety (CRITICAL): this is pgid-scoped to the group THIS run
14451
+ # recorded at .loki/loki.pgid -- it NEVER uses a name-based `pkill claude`
14452
+ # sweep. A concurrent foreign loki run is its own session leader with a
14453
+ # DIFFERENT pgid and a different .loki, so it can never be a member of our
14454
+ # group and is structurally unreachable. The pgid file only exists when this
14455
+ # runner setsid'd into its own session (LOKI_OWN_SESSION=1, recorded at
14456
+ # ~run.sh:15034); in interactive foreground we share the user's shell group,
14457
+ # leave the pgid absent, and skip this reap entirely -- so Ctrl+C semantics
14458
+ # and the user's shell are untouched.
14459
+ reap_own_process_group() {
14460
+ local loki_dir="${TARGET_DIR:-.}/.loki"
14461
+ # Resolve the pgid file the same way main() recorded it (global or per-session).
14462
+ local _reap_pgid_file="$loki_dir/loki.pid"
14463
+ if [ -n "${LOKI_SESSION_ID:-}" ]; then
14464
+ _reap_pgid_file="$loki_dir/sessions/${LOKI_SESSION_ID}/loki.pid"
14465
+ fi
14466
+ _reap_pgid_file="${_reap_pgid_file%.pid}.pgid"
14467
+ [ -f "$_reap_pgid_file" ] || return 0 # interactive / no own session: skip
14468
+
14469
+ local _pgid
14470
+ _pgid=$(cat "$_reap_pgid_file" 2>/dev/null | tr -d ' ')
14471
+ case "$_pgid" in ''|*[!0-9]*) return 0 ;; esac
14472
+ [ "$_pgid" -gt 1 ] 2>/dev/null || return 0 # never touch pgid 0/1
14473
+
14474
+ # Safety: the recorded pgid MUST be our own group. If it isn't (stale file
14475
+ # from a prior run, copied tree), refuse -- killing a group we do not own
14476
+ # could hit unrelated processes.
14477
+ local _my_pgid
14478
+ _my_pgid=$(ps -o pgid= -p $$ 2>/dev/null | tr -d ' ')
14479
+ [ -n "$_my_pgid" ] && [ "$_pgid" = "$_my_pgid" ] || return 0
14480
+
14481
+ # Collect protected pids (dashboard, app-runner, registered children) so the
14482
+ # reap never takes down the shared dashboard if it happens to share our
14483
+ # group. Mirrors the `loki stop` / dashboard reaper protection set.
14484
+ local _protected=" $$ "
14485
+ local _pf _p
14486
+ if [ -d "$loki_dir/pids" ]; then
14487
+ for _pf in "$loki_dir/pids"/*.json; do
14488
+ [ -f "$_pf" ] || continue
14489
+ _p=$(basename "$_pf" .json)
14490
+ case "$_p" in ''|*[!0-9]*) continue ;; esac
14491
+ _protected="${_protected}${_p} "
14492
+ done
14493
+ for _pf in "$loki_dir/pids"/*.pid; do
14494
+ [ -f "$_pf" ] || continue
14495
+ _p=$(cat "$_pf" 2>/dev/null | head -1 | tr -d '[:space:]')
14496
+ [ -n "$_p" ] && _protected="${_protected}${_p} "
14497
+ done
14498
+ fi
14499
+ for _pf in "$loki_dir/dashboard/dashboard.pid" "${HOME}/.loki/dashboard/dashboard.pid"; do
14500
+ [ -f "$_pf" ] && _protected="${_protected}$(cat "$_pf" 2>/dev/null | tr -d ' ') "
14501
+ done
14502
+
14503
+ # Per-pid TERM then KILL of group members, EXCLUDING $$ (so main() survives
14504
+ # to finish its remaining cleanup and exit normally) and protected pids. We
14505
+ # do per-pid (not a blanket `kill -- -PGID`) precisely so $$ and the
14506
+ # dashboard are spared -- a group-wide signal cannot exclude members.
14507
+ local _gp _did=0
14508
+ for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_pgid" '$2==g{print $1}'); do
14509
+ case "$_protected" in *" $_gp "*) continue ;; esac
14510
+ kill -TERM "$_gp" 2>/dev/null && _did=1
14511
+ done
14512
+ [ "$_did" = "1" ] || return 0
14513
+ sleep 1
14514
+ for _gp in $(ps -axo pid=,pgid= 2>/dev/null | awk -v g="$_pgid" '$2==g{print $1}'); do
14515
+ case "$_protected" in *" $_gp "*) continue ;; esac
14516
+ kill -KILL "$_gp" 2>/dev/null || true
14517
+ done
14518
+ return 0
14519
+ }
14520
+
14133
14521
  # Check for human intervention signals
14134
14522
  check_human_intervention() {
14135
14523
  local loki_dir="${TARGET_DIR:-.}/.loki"
@@ -14264,6 +14652,11 @@ check_human_intervention() {
14264
14652
  if type council_reverify_checklist &>/dev/null; then
14265
14653
  council_reverify_checklist 2>/dev/null || true
14266
14654
  fi
14655
+ # Finding #598 (HIGH): generate real test evidence before the force-review
14656
+ # evidence gate, so the test axis is not half-blind on absent results.
14657
+ if type ensure_completion_test_evidence &>/dev/null; then
14658
+ ensure_completion_test_evidence || true
14659
+ fi
14267
14660
  if type council_checklist_gate &>/dev/null && ! council_checklist_gate; then
14268
14661
  log_info "Council force-review: blocked by checklist hard gate"
14269
14662
  elif type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
@@ -15136,13 +15529,22 @@ main() {
15136
15529
  app_runner_cleanup
15137
15530
  fi
15138
15531
  stop_status_monitor
15532
+ # v7.41.x: authoritatively reap THIS run's process group on a normal
15533
+ # completion (council stop / max-iterations / completion promise), the same
15534
+ # group reap the STOP signal does. Without this, a provider agent that
15535
+ # detached or reparented survived the exit and ran as an orphan (~27 min in
15536
+ # the reported brownfield run). pgid-scoped to .loki/loki.pgid + excludes
15537
+ # $$/dashboard, so it cannot touch a foreign loki run. No-ops in interactive
15538
+ # foreground (no own session => no pgid file), preserving Ctrl+C semantics.
15539
+ reap_own_process_group 2>/dev/null || true
15139
15540
  local loki_dir="${TARGET_DIR:-.}/.loki"
15140
- rm -f "$loki_dir/loki.pid" 2>/dev/null
15541
+ rm -f "$loki_dir/loki.pid" "$loki_dir/loki.pgid" 2>/dev/null
15141
15542
  # UT2-13: Clear cli-provider marker on normal session end.
15142
15543
  rm -f "$loki_dir/state/cli-provider" 2>/dev/null || true
15143
15544
  # Clean up per-session PID file if running with session ID
15144
15545
  if [ -n "${LOKI_SESSION_ID:-}" ]; then
15145
- rm -f "$loki_dir/sessions/${LOKI_SESSION_ID}/loki.pid" 2>/dev/null
15546
+ rm -f "$loki_dir/sessions/${LOKI_SESSION_ID}/loki.pid" \
15547
+ "$loki_dir/sessions/${LOKI_SESSION_ID}/loki.pgid" 2>/dev/null
15146
15548
  fi
15147
15549
  # Mark session.json as stopped
15148
15550
  if [ -f "$loki_dir/session.json" ]; then
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "7.40.0"
10
+ __version__ = "7.41.1"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try: