loki-mode 7.45.0 → 7.46.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -12
- package/SKILL.md +5 -5
- package/VERSION +1 -1
- package/autonomy/CONSTITUTION.md +9 -2
- package/autonomy/lib/sentrux-gate.sh +1 -1
- package/autonomy/loki +2 -2
- package/autonomy/run.sh +355 -92
- package/dashboard/__init__.py +1 -1
- package/dashboard/registry.py +156 -62
- package/dashboard/server.py +9 -10
- package/docs/COMPARISON.md +10 -10
- package/docs/COMPETITIVE-ANALYSIS.md +1 -1
- package/docs/INSTALLATION.md +2 -2
- package/docs/P0-SWEEP-PLAN.md +163 -0
- package/docs/architecture/STATE-MACHINES.md +18 -19
- package/docs/architecture/bmad-loki-voice-agent-council-analysis.md +1 -1
- package/docs/auto-claude-comparison.md +14 -11
- package/docs/certification/01-core-concepts/lesson.md +12 -11
- package/docs/certification/01-core-concepts/quiz.md +6 -6
- package/docs/certification/05-troubleshooting/lesson.md +23 -13
- package/docs/certification/05-troubleshooting/quiz.md +3 -3
- package/docs/certification/answer-key.md +2 -2
- package/docs/certification/certification-exam.md +9 -9
- package/docs/competitive/bolt-new-analysis.md +1 -1
- package/docs/competitive/emergence-others-analysis.md +9 -9
- package/docs/competitive/replit-lovable-analysis.md +3 -3
- package/docs/cursor-comparison.md +15 -12
- package/docs/dashboard-guide.md +9 -7
- package/docs/prd-purple-lab-platform-v2.md +1 -1
- package/docs/prd-purple-lab-platform.md +3 -3
- package/docs/show-hn-post.md +2 -2
- package/loki-ts/dist/loki.js +2 -2
- package/mcp/__init__.py +1 -1
- package/package.json +2 -2
- package/plugins/loki-mode/.claude-plugin/plugin.json +2 -2
- package/plugins/loki-mode/README.md +1 -1
- package/references/magic-rarv-integration.md +1 -1
- package/references/quality-control.md +5 -5
- package/references/sdlc-phases.md +1 -2
- package/skills/00-index.md +1 -1
- package/skills/artifacts.md +1 -1
- package/skills/healing.md +1 -1
- package/skills/magic-modules.md +3 -3
- package/skills/quality-gates.md +52 -39
- package/skills/testing.md +1 -1
package/autonomy/run.sh
CHANGED
|
@@ -7035,7 +7035,6 @@ enforce_test_coverage() {
|
|
|
7035
7035
|
|
|
7036
7036
|
local min_coverage="${LOKI_MIN_COVERAGE:-80}"
|
|
7037
7037
|
local test_passed=true
|
|
7038
|
-
local coverage_pct=0
|
|
7039
7038
|
local test_runner="none"
|
|
7040
7039
|
local details=""
|
|
7041
7040
|
|
|
@@ -7262,12 +7261,12 @@ TREOF
|
|
|
7262
7261
|
if [ "$test_passed" = "true" ]; then
|
|
7263
7262
|
touch "$quality_dir/unit-tests.pass"
|
|
7264
7263
|
rm -f "$loki_dir/signals/TESTS_FAILED" 2>/dev/null || true
|
|
7265
|
-
log_info "Test
|
|
7264
|
+
log_info "Test suite gate: $test_runner passed"
|
|
7266
7265
|
return 0
|
|
7267
7266
|
else
|
|
7268
7267
|
rm -f "$quality_dir/unit-tests.pass"
|
|
7269
7268
|
echo "tests_failed" > "$loki_dir/signals/TESTS_FAILED" 2>/dev/null || true
|
|
7270
|
-
log_warn "Test
|
|
7269
|
+
log_warn "Test suite gate: $test_runner FAILED"
|
|
7271
7270
|
return 1
|
|
7272
7271
|
fi
|
|
7273
7272
|
}
|
|
@@ -7380,7 +7379,7 @@ run_doc_staleness_check() {
|
|
|
7380
7379
|
}
|
|
7381
7380
|
|
|
7382
7381
|
# ============================================================================
|
|
7383
|
-
# Documentation Quality Gate - Gate
|
|
7382
|
+
# Documentation Quality Gate - Gate 7 (Documentation Coverage)
|
|
7384
7383
|
# Checks README, documentation freshness, and package API docs
|
|
7385
7384
|
# ============================================================================
|
|
7386
7385
|
|
|
@@ -7533,6 +7532,139 @@ run_magic_debate_gate() {
|
|
|
7533
7532
|
return 0
|
|
7534
7533
|
}
|
|
7535
7534
|
|
|
7535
|
+
# ============================================================================
|
|
7536
|
+
# Mock Integrity Gate (P0-3): wire tests/detect-mock-problems.sh as a blocking
|
|
7537
|
+
# gate. The detector scans test files for mock patterns that mask real failures
|
|
7538
|
+
# (tautological assertions, inline-mock-only tests, conditional/empty bodies,
|
|
7539
|
+
# high internal-mock ratios). Invoked with --strict so it exits 1 iff CRITICAL
|
|
7540
|
+
# or HIGH findings exist; MED/LOW never block (they are routed to a findings
|
|
7541
|
+
# file for next-iteration injection). Opt out with LOKI_GATE_MOCK=false.
|
|
7542
|
+
#
|
|
7543
|
+
# Scan-target note: the wrapper exports LOKI_SCAN_DIR=TARGET_DIR at the detector
|
|
7544
|
+
# invocation, and the detector honors it (tests/detect-mock-problems.sh:23), so
|
|
7545
|
+
# the gate scans the target project, not the loki-mode tree. When LOKI_SCAN_DIR
|
|
7546
|
+
# is unset the detector falls back to its own repo (the default for loki-mode's
|
|
7547
|
+
# own test run); the wrapper always sets it, so the target is what gets scanned.
|
|
7548
|
+
# ============================================================================
|
|
7549
|
+
enforce_mock_integrity() {
|
|
7550
|
+
local loki_dir="${TARGET_DIR:-.}/.loki"
|
|
7551
|
+
local quality_dir="$loki_dir/quality"
|
|
7552
|
+
mkdir -p "$quality_dir"
|
|
7553
|
+
local findings_file="$quality_dir/mock-findings.txt"
|
|
7554
|
+
local detector="$SCRIPT_DIR/../tests/detect-mock-problems.sh"
|
|
7555
|
+
local gate_timeout="${LOKI_GATE_TIMEOUT:-300}"
|
|
7556
|
+
|
|
7557
|
+
if [ ! -f "$detector" ]; then
|
|
7558
|
+
log_info "Mock integrity gate: detector not found, skipping (inconclusive)"
|
|
7559
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
7560
|
+
return 0
|
|
7561
|
+
fi
|
|
7562
|
+
|
|
7563
|
+
local output rc
|
|
7564
|
+
output=$(cd "${TARGET_DIR:-.}" && LOKI_SCAN_DIR="${TARGET_DIR:-.}" \
|
|
7565
|
+
timeout "$gate_timeout" bash "$detector" --strict 2>&1)
|
|
7566
|
+
rc=$?
|
|
7567
|
+
|
|
7568
|
+
# timeout exit 124 -- treat as inconclusive (do not block on a hang)
|
|
7569
|
+
if [ "$rc" -eq 124 ]; then
|
|
7570
|
+
log_warn "Mock integrity gate: detector timed out after ${gate_timeout}s -- inconclusive"
|
|
7571
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
7572
|
+
return 0
|
|
7573
|
+
fi
|
|
7574
|
+
|
|
7575
|
+
if [ "$rc" -ne 0 ]; then
|
|
7576
|
+
# --strict exits 1 iff CRITICAL or HIGH found. Persist per-finding text.
|
|
7577
|
+
{
|
|
7578
|
+
echo "# Mock integrity findings (CRITICAL/HIGH block this iteration)"
|
|
7579
|
+
echo "$output" | grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' || true
|
|
7580
|
+
} > "$findings_file"
|
|
7581
|
+
log_warn "Mock integrity gate: CRITICAL/HIGH mock problems detected -- BLOCK"
|
|
7582
|
+
return 1
|
|
7583
|
+
fi
|
|
7584
|
+
|
|
7585
|
+
# Pass: record any MED/LOW findings for injection, then clear the block file.
|
|
7586
|
+
local med_low
|
|
7587
|
+
med_low=$(echo "$output" | grep -E '\[(MEDIUM|LOW)\]' || true)
|
|
7588
|
+
if [ -n "$med_low" ]; then
|
|
7589
|
+
{
|
|
7590
|
+
echo "# Mock integrity advisory findings (MED/LOW, non-blocking)"
|
|
7591
|
+
echo "$med_low"
|
|
7592
|
+
} > "$findings_file"
|
|
7593
|
+
else
|
|
7594
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
7595
|
+
fi
|
|
7596
|
+
log_info "Mock integrity gate: PASS"
|
|
7597
|
+
return 0
|
|
7598
|
+
}
|
|
7599
|
+
|
|
7600
|
+
# ============================================================================
|
|
7601
|
+
# Test Mutation Integrity Gate (P0-3): wire tests/detect-test-mutations.sh as a
|
|
7602
|
+
# blocking gate. The detector flags assertion-value mutations that look like
|
|
7603
|
+
# test-fitting (tests changed to match buggy output). We do NOT pass --strict:
|
|
7604
|
+
# --strict blocks on ANY finding (over-blocks on MED/LOW). Instead we parse
|
|
7605
|
+
# stdout and block only when a [HIGH] line is present; MED/LOW are routed to a
|
|
7606
|
+
# findings file for next-iteration injection. Opt out with LOKI_GATE_MUTATION=false.
|
|
7607
|
+
#
|
|
7608
|
+
# Scan-target note: same as the mock gate -- the wrapper exports
|
|
7609
|
+
# LOKI_SCAN_DIR=TARGET_DIR and the detector honors it
|
|
7610
|
+
# (tests/detect-test-mutations.sh:33), so the gate scans the target project, not
|
|
7611
|
+
# the loki-mode tree. The Check-5 git history is also read from that directory.
|
|
7612
|
+
# ============================================================================
|
|
7613
|
+
enforce_mutation_integrity() {
|
|
7614
|
+
local loki_dir="${TARGET_DIR:-.}/.loki"
|
|
7615
|
+
local quality_dir="$loki_dir/quality"
|
|
7616
|
+
mkdir -p "$quality_dir"
|
|
7617
|
+
local findings_file="$quality_dir/mutation-findings.txt"
|
|
7618
|
+
local detector="$SCRIPT_DIR/../tests/detect-test-mutations.sh"
|
|
7619
|
+
local gate_timeout="${LOKI_GATE_TIMEOUT:-300}"
|
|
7620
|
+
|
|
7621
|
+
if [ ! -f "$detector" ]; then
|
|
7622
|
+
log_info "Mutation integrity gate: detector not found, skipping (inconclusive)"
|
|
7623
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
7624
|
+
return 0
|
|
7625
|
+
fi
|
|
7626
|
+
|
|
7627
|
+
local output rc
|
|
7628
|
+
# No --strict: it over-blocks on MED/LOW. Decide on [HIGH] lines instead.
|
|
7629
|
+
output=$(cd "${TARGET_DIR:-.}" && LOKI_SCAN_DIR="${TARGET_DIR:-.}" \
|
|
7630
|
+
timeout "$gate_timeout" bash "$detector" 2>&1)
|
|
7631
|
+
rc=$?
|
|
7632
|
+
|
|
7633
|
+
if [ "$rc" -eq 124 ]; then
|
|
7634
|
+
log_warn "Mutation integrity gate: detector timed out after ${gate_timeout}s -- inconclusive"
|
|
7635
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
7636
|
+
return 0
|
|
7637
|
+
fi
|
|
7638
|
+
|
|
7639
|
+
local high_count
|
|
7640
|
+
high_count=$(echo "$output" | grep -c '\[HIGH\]' || true)
|
|
7641
|
+
# grep -c returns 0 with no matches but may print empty under set -e edge; normalize.
|
|
7642
|
+
[ -z "$high_count" ] && high_count=0
|
|
7643
|
+
|
|
7644
|
+
if [ "$high_count" -gt 0 ]; then
|
|
7645
|
+
{
|
|
7646
|
+
echo "# Test mutation findings (HIGH blocks this iteration)"
|
|
7647
|
+
echo "$output" | grep -E '\[(HIGH|MEDIUM|MED|LOW)\]' || true
|
|
7648
|
+
} > "$findings_file"
|
|
7649
|
+
log_warn "Mutation integrity gate: $high_count HIGH test-fitting finding(s) -- BLOCK"
|
|
7650
|
+
return 1
|
|
7651
|
+
fi
|
|
7652
|
+
|
|
7653
|
+
# Pass: route any MED/LOW findings to injection file, else clear it.
|
|
7654
|
+
local med_low
|
|
7655
|
+
med_low=$(echo "$output" | grep -E '\[(MEDIUM|MED|LOW)\]' || true)
|
|
7656
|
+
if [ -n "$med_low" ]; then
|
|
7657
|
+
{
|
|
7658
|
+
echo "# Test mutation advisory findings (MED/LOW, non-blocking)"
|
|
7659
|
+
echo "$med_low"
|
|
7660
|
+
} > "$findings_file"
|
|
7661
|
+
else
|
|
7662
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
7663
|
+
fi
|
|
7664
|
+
log_info "Mutation integrity gate: PASS"
|
|
7665
|
+
return 0
|
|
7666
|
+
}
|
|
7667
|
+
|
|
7536
7668
|
# ============================================================================
|
|
7537
7669
|
# 3-Reviewer Parallel Code Review (v5.35.0)
|
|
7538
7670
|
# Specialist pool from skills/quality-gates.md with blind review
|
|
@@ -7785,6 +7917,97 @@ MANAGED_REVIEW
|
|
|
7785
7917
|
return 0
|
|
7786
7918
|
}
|
|
7787
7919
|
|
|
7920
|
+
# _dispatch_reviewer: single-reviewer provider invocation, factored out of
|
|
7921
|
+
# run_code_review so the blind-council loop AND the Devil's-Advocate re-review
|
|
7922
|
+
# (P0-4) share ONE dispatch path. This preserves the load-bearing claude trust
|
|
7923
|
+
# guards (no --model/Fable routing, --bare, --disallowedTools, caveman OFF) for
|
|
7924
|
+
# both callers; a hand-written parallel dispatcher would drift from them.
|
|
7925
|
+
# Args: $1 = prompt text, $2 = output file path. Writes the model reply to $2.
|
|
7926
|
+
_dispatch_reviewer() {
|
|
7927
|
+
local prompt_text="$1"
|
|
7928
|
+
local review_output="$2"
|
|
7929
|
+
case "${PROVIDER_NAME:-claude}" in
|
|
7930
|
+
claude)
|
|
7931
|
+
# SECURITY-REVIEW MODEL GUARD (evidence-based routing, item 4b):
|
|
7932
|
+
# Reviewers deliberately do NOT pass --model, so they run on
|
|
7933
|
+
# the account default model and are NEVER routed to Fable by a
|
|
7934
|
+
# mid-flight model override or LOKI_FABLE_ARCHITECT (those only
|
|
7935
|
+
# rewrite the iteration's tier_param, not this dispatch). This
|
|
7936
|
+
# must stay true. The official model-config docs CONTRADICT
|
|
7937
|
+
# routing security review to Fable: Fable's safety classifiers
|
|
7938
|
+
# refuse cybersecurity content, and in non-interactive (-p)
|
|
7939
|
+
# mode a flagged request ends the turn with stop_reason
|
|
7940
|
+
# "refusal" instead of a transparent Opus re-run. A refused
|
|
7941
|
+
# security reviewer would return no VERDICT and break the
|
|
7942
|
+
# unanimous-council gate. Defensive-cyber capability lives in
|
|
7943
|
+
# Mythos 5 (Project Glasswing), not Fable. If a future change
|
|
7944
|
+
# adds --model here, the security-sentinel reviewer must be
|
|
7945
|
+
# pinned to opus, never fable.
|
|
7946
|
+
# EMBED 2 + 3 (v7.33.0). This is a trust-gate council subcall.
|
|
7947
|
+
# $prompt_text is fully self-contained (the diff, changed files,
|
|
7948
|
+
# checks, and strict VERDICT/FINDINGS output format), output is
|
|
7949
|
+
# captured to $review_output, and it deliberately does NOT pass
|
|
7950
|
+
# --model or go through buildAutoFlags. So:
|
|
7951
|
+
# EMBED 2 (--bare): the prompt needs no hooks/LSP/CLAUDE.md/
|
|
7952
|
+
# MCP discovery, so --bare is safe and cheaper. Opt out
|
|
7953
|
+
# LOKI_BARE_SUBCALLS=0.
|
|
7954
|
+
# EMBED 3 (--disallowedTools): raise the cost of a reviewer
|
|
7955
|
+
# casually mutating the tree (a parallel agent once ran
|
|
7956
|
+
# `git reset --hard` and wiped uncommitted work). Deny
|
|
7957
|
+
# Edit/Write/NotebookEdit + git mutation forms (incl. the
|
|
7958
|
+
# git -C / --git-dir evasions); read-only git stays allowed.
|
|
7959
|
+
# Guardrail, not a sandbox -- echo>/sed -i/etc. remain; the
|
|
7960
|
+
# real net is commit-before-agent-wave. Opt out
|
|
7961
|
+
# LOKI_REVIEW_TOOL_GUARD=0. See loki_review_guard_denylist.
|
|
7962
|
+
local _rv_argv=("--dangerously-skip-permissions")
|
|
7963
|
+
if type loki_subcall_bare_enabled >/dev/null 2>&1 && loki_subcall_bare_enabled; then
|
|
7964
|
+
_rv_argv+=("--bare")
|
|
7965
|
+
fi
|
|
7966
|
+
if type loki_review_guard_enabled >/dev/null 2>&1 && loki_review_guard_enabled; then
|
|
7967
|
+
_rv_argv+=("--disallowedTools" "$(loki_review_guard_denylist)")
|
|
7968
|
+
fi
|
|
7969
|
+
# EMBED 3b (--allowedTools, #167): positive least-privilege
|
|
7970
|
+
# allowlist. DEFAULT OFF (opt-in LOKI_REVIEW_ALLOWLIST=1).
|
|
7971
|
+
# Emitted ALONGSIDE the denylist: verified live (claude
|
|
7972
|
+
# 2.1.177) that deny precedence holds even under
|
|
7973
|
+
# --dangerously-skip-permissions, so the denylist still
|
|
7974
|
+
# hard-blocks mutations while this narrows the surface to
|
|
7975
|
+
# read/inspect tools. See loki_review_allowlist.
|
|
7976
|
+
if type loki_review_allowlist_enabled >/dev/null 2>&1 && loki_review_allowlist_enabled; then
|
|
7977
|
+
_rv_argv+=("--allowedTools" "$(loki_review_allowlist)")
|
|
7978
|
+
fi
|
|
7979
|
+
# caveman HARD-SUPPRESS (parsed output): this is a trust-gate
|
|
7980
|
+
# subcall whose output is parsed for "^VERDICT:" + findings. A
|
|
7981
|
+
# globally-active caveman would compress/reword that line and
|
|
7982
|
+
# silently flip the verdict, so we UNCONDITIONALLY disable
|
|
7983
|
+
# caveman here with CAVEMAN_DEFAULT_MODE=off (the activate hook
|
|
7984
|
+
# then deletes its flag and emits nothing). Set inline, not via
|
|
7985
|
+
# the helper, so the carve-out holds even when the helper is
|
|
7986
|
+
# out of scope. No-op when caveman is absent.
|
|
7987
|
+
CAVEMAN_DEFAULT_MODE=off \
|
|
7988
|
+
claude "${_rv_argv[@]}" -p "$prompt_text" \
|
|
7989
|
+
--output-format text > "$review_output" 2>/dev/null
|
|
7990
|
+
;;
|
|
7991
|
+
codex)
|
|
7992
|
+
codex exec --full-auto --skip-git-repo-check "$prompt_text" \
|
|
7993
|
+
> "$review_output" 2>/dev/null
|
|
7994
|
+
;;
|
|
7995
|
+
cline)
|
|
7996
|
+
invoke_cline_capture "$prompt_text" \
|
|
7997
|
+
> "$review_output" 2>/dev/null
|
|
7998
|
+
;;
|
|
7999
|
+
aider)
|
|
8000
|
+
invoke_aider_capture "$prompt_text" \
|
|
8001
|
+
> "$review_output" 2>/dev/null
|
|
8002
|
+
;;
|
|
8003
|
+
*)
|
|
8004
|
+
echo "VERDICT: PASS" > "$review_output"
|
|
8005
|
+
echo "FINDINGS:" >> "$review_output"
|
|
8006
|
+
echo "- [Low] Unknown provider, review skipped" >> "$review_output"
|
|
8007
|
+
;;
|
|
8008
|
+
esac
|
|
8009
|
+
}
|
|
8010
|
+
|
|
7788
8011
|
run_code_review() {
|
|
7789
8012
|
local loki_dir="${TARGET_DIR:-.}/.loki"
|
|
7790
8013
|
local review_dir="$loki_dir/quality/reviews"
|
|
@@ -7873,7 +8096,7 @@ MANAGED_SELECTION
|
|
|
7873
8096
|
# Select specialists via keyword scoring (python3 reads files, not env vars)
|
|
7874
8097
|
# Loads from agents/types.json when available, falls back to hardcoded pool (v6.7.0)
|
|
7875
8098
|
# v7.4.20: gate legacy-healing-auditor on healing-mode signals to match
|
|
7876
|
-
# the documented contract in skills/quality-gates.md (
|
|
8099
|
+
# the documented contract in skills/quality-gates.md (conditional backward-compat auditor, not one of the 8 numbered gates).
|
|
7877
8100
|
local healing_active="false"
|
|
7878
8101
|
if [ "${LOKI_HEAL_MODE:-}" = "true" ] || [ "${LOKI_HEAL_MODE:-}" = "1" ]; then
|
|
7879
8102
|
healing_active="true"
|
|
@@ -7969,7 +8192,7 @@ if files_path and os.path.exists(files_path):
|
|
|
7969
8192
|
search_text = diff_text + " " + files_text
|
|
7970
8193
|
|
|
7971
8194
|
# v7.4.20: gate legacy-healing-auditor on healing-mode signals to match
|
|
7972
|
-
# skills/quality-gates.md (
|
|
8195
|
+
# skills/quality-gates.md (conditional backward-compat auditor, not one of the 8 numbered gates) which documents it as conditional. The
|
|
7973
8196
|
# auditor BLOCKs on missing characterization tests / missing adapters, which
|
|
7974
8197
|
# is a contract a greenfield project never agreed to maintain. agentbudget
|
|
7975
8198
|
# regression: the auditor pinned 9 of 10 iterations to forced PAUSE because
|
|
@@ -8097,91 +8320,11 @@ BUILD_PROMPT
|
|
|
8097
8320
|
|
|
8098
8321
|
log_step "Dispatching reviewer: $reviewer_name"
|
|
8099
8322
|
|
|
8100
|
-
# Launch blind review in background (
|
|
8323
|
+
# Launch blind review in background (shared dispatch helper).
|
|
8101
8324
|
(
|
|
8102
8325
|
local prompt_text
|
|
8103
8326
|
prompt_text=$(cat "$review_prompt_file")
|
|
8104
|
-
|
|
8105
|
-
claude)
|
|
8106
|
-
# SECURITY-REVIEW MODEL GUARD (evidence-based routing, item 4b):
|
|
8107
|
-
# Reviewers deliberately do NOT pass --model, so they run on
|
|
8108
|
-
# the account default model and are NEVER routed to Fable by a
|
|
8109
|
-
# mid-flight model override or LOKI_FABLE_ARCHITECT (those only
|
|
8110
|
-
# rewrite the iteration's tier_param, not this dispatch). This
|
|
8111
|
-
# must stay true. The official model-config docs CONTRADICT
|
|
8112
|
-
# routing security review to Fable: Fable's safety classifiers
|
|
8113
|
-
# refuse cybersecurity content, and in non-interactive (-p)
|
|
8114
|
-
# mode a flagged request ends the turn with stop_reason
|
|
8115
|
-
# "refusal" instead of a transparent Opus re-run. A refused
|
|
8116
|
-
# security reviewer would return no VERDICT and break the
|
|
8117
|
-
# unanimous-council gate. Defensive-cyber capability lives in
|
|
8118
|
-
# Mythos 5 (Project Glasswing), not Fable. If a future change
|
|
8119
|
-
# adds --model here, the security-sentinel reviewer must be
|
|
8120
|
-
# pinned to opus, never fable.
|
|
8121
|
-
# EMBED 2 + 3 (v7.33.0). This is a 3-reviewer council
|
|
8122
|
-
# subcall. $prompt_text is fully self-contained (built above
|
|
8123
|
-
# into $review_prompt_file with the diff, changed files,
|
|
8124
|
-
# checks, and strict VERDICT/FINDINGS output format), output
|
|
8125
|
-
# is captured to $review_output, and it deliberately does NOT
|
|
8126
|
-
# pass --model or go through buildAutoFlags. So:
|
|
8127
|
-
# EMBED 2 (--bare): the prompt needs no hooks/LSP/CLAUDE.md/
|
|
8128
|
-
# MCP discovery, so --bare is safe and cheaper. Opt out
|
|
8129
|
-
# LOKI_BARE_SUBCALLS=0.
|
|
8130
|
-
# EMBED 3 (--disallowedTools): raise the cost of a reviewer
|
|
8131
|
-
# casually mutating the tree (a parallel agent once ran
|
|
8132
|
-
# `git reset --hard` and wiped uncommitted work). Deny
|
|
8133
|
-
# Edit/Write/NotebookEdit + git mutation forms (incl. the
|
|
8134
|
-
# git -C / --git-dir evasions); read-only git stays allowed.
|
|
8135
|
-
# Guardrail, not a sandbox -- echo>/sed -i/etc. remain; the
|
|
8136
|
-
# real net is commit-before-agent-wave. Opt out
|
|
8137
|
-
# LOKI_REVIEW_TOOL_GUARD=0. See loki_review_guard_denylist.
|
|
8138
|
-
local _rv_argv=("--dangerously-skip-permissions")
|
|
8139
|
-
if type loki_subcall_bare_enabled >/dev/null 2>&1 && loki_subcall_bare_enabled; then
|
|
8140
|
-
_rv_argv+=("--bare")
|
|
8141
|
-
fi
|
|
8142
|
-
if type loki_review_guard_enabled >/dev/null 2>&1 && loki_review_guard_enabled; then
|
|
8143
|
-
_rv_argv+=("--disallowedTools" "$(loki_review_guard_denylist)")
|
|
8144
|
-
fi
|
|
8145
|
-
# EMBED 3b (--allowedTools, #167): positive least-privilege
|
|
8146
|
-
# allowlist. DEFAULT OFF (opt-in LOKI_REVIEW_ALLOWLIST=1).
|
|
8147
|
-
# Emitted ALONGSIDE the denylist: verified live (claude
|
|
8148
|
-
# 2.1.177) that deny precedence holds even under
|
|
8149
|
-
# --dangerously-skip-permissions, so the denylist still
|
|
8150
|
-
# hard-blocks mutations while this narrows the surface to
|
|
8151
|
-
# read/inspect tools. See loki_review_allowlist.
|
|
8152
|
-
if type loki_review_allowlist_enabled >/dev/null 2>&1 && loki_review_allowlist_enabled; then
|
|
8153
|
-
_rv_argv+=("--allowedTools" "$(loki_review_allowlist)")
|
|
8154
|
-
fi
|
|
8155
|
-
# caveman HARD-SUPPRESS (parsed output): this is a trust-gate
|
|
8156
|
-
# subcall whose output is parsed for "^VERDICT:" + findings. A
|
|
8157
|
-
# globally-active caveman would compress/reword that line and
|
|
8158
|
-
# silently flip the verdict, so we UNCONDITIONALLY disable
|
|
8159
|
-
# caveman here with CAVEMAN_DEFAULT_MODE=off (the activate hook
|
|
8160
|
-
# then deletes its flag and emits nothing). Set inline, not via
|
|
8161
|
-
# the helper, so the carve-out holds even when the helper is
|
|
8162
|
-
# out of scope. No-op when caveman is absent.
|
|
8163
|
-
CAVEMAN_DEFAULT_MODE=off \
|
|
8164
|
-
claude "${_rv_argv[@]}" -p "$prompt_text" \
|
|
8165
|
-
--output-format text > "$review_output" 2>/dev/null
|
|
8166
|
-
;;
|
|
8167
|
-
codex)
|
|
8168
|
-
codex exec --full-auto --skip-git-repo-check "$prompt_text" \
|
|
8169
|
-
> "$review_output" 2>/dev/null
|
|
8170
|
-
;;
|
|
8171
|
-
cline)
|
|
8172
|
-
invoke_cline_capture "$prompt_text" \
|
|
8173
|
-
> "$review_output" 2>/dev/null
|
|
8174
|
-
;;
|
|
8175
|
-
aider)
|
|
8176
|
-
invoke_aider_capture "$prompt_text" \
|
|
8177
|
-
> "$review_output" 2>/dev/null
|
|
8178
|
-
;;
|
|
8179
|
-
*)
|
|
8180
|
-
echo "VERDICT: PASS" > "$review_output"
|
|
8181
|
-
echo "FINDINGS:" >> "$review_output"
|
|
8182
|
-
echo "- [Low] Unknown provider, review skipped" >> "$review_output"
|
|
8183
|
-
;;
|
|
8184
|
-
esac
|
|
8327
|
+
_dispatch_reviewer "$prompt_text" "$review_output"
|
|
8185
8328
|
) &
|
|
8186
8329
|
pids+=($!)
|
|
8187
8330
|
register_pid "$!" "code-reviewer" "name=$reviewer_name"
|
|
@@ -8314,12 +8457,108 @@ AGG_SCRIPT
|
|
|
8314
8457
|
"iteration=$ITERATION_COUNT"
|
|
8315
8458
|
|
|
8316
8459
|
# Anti-sycophancy check: unanimous PASS is suspicious
|
|
8317
|
-
if [ "$pass_count" -eq "$reviewer_count" ] && [ "$fail_count" -eq 0 ]; then
|
|
8460
|
+
if [ "$pass_count" -eq "$reviewer_count" ] && [ "$fail_count" -eq 0 ] && [ "$reviewer_count" -gt 0 ]; then
|
|
8318
8461
|
log_warn "ANTI-SYCOPHANCY: All $reviewer_count reviewers passed unanimously"
|
|
8319
8462
|
log_warn "Devil's advocate note: Unanimous approval may indicate insufficient scrutiny"
|
|
8320
8463
|
log_warn "Consider manual review of $review_dir/$review_id/"
|
|
8321
8464
|
echo "UNANIMOUS_PASS: All reviewers approved - potential sycophancy risk" \
|
|
8322
8465
|
>> "$review_dir/$review_id/anti-sycophancy.txt"
|
|
8466
|
+
|
|
8467
|
+
# P0-4: Devil's-Advocate re-review. The bare warning above was INERT --
|
|
8468
|
+
# it never changed the verdict. Now, on unanimous PASS, dispatch ONE
|
|
8469
|
+
# additional adversarial reviewer (reusing _dispatch_reviewer so the
|
|
8470
|
+
# same trust guards + provider routing apply) whose sole job is to find
|
|
8471
|
+
# a Critical/High issue the unanimous council missed. If it does, we set
|
|
8472
|
+
# has_blocking=true so the EXISTING blocking decision below fires and the
|
|
8473
|
+
# gate returns 1. Runs in the FOREGROUND (no &) so has_blocking mutates
|
|
8474
|
+
# this (parent) shell, not a subshell. Opt out LOKI_GATE_DEVILS_ADVOCATE=false.
|
|
8475
|
+
if [ "${LOKI_GATE_DEVILS_ADVOCATE:-true}" = "true" ]; then
|
|
8476
|
+
log_info "Devil's Advocate: re-reviewing unanimous PASS for missed Critical/High issues..."
|
|
8477
|
+
local da_output="$review_dir/$review_id/devils-advocate.txt"
|
|
8478
|
+
local da_prompt_file="$review_dir/$review_id/devils-advocate-prompt.txt"
|
|
8479
|
+
export LOKI_DA_PROMPT_DIFF_FILE="$diff_file"
|
|
8480
|
+
export LOKI_DA_PROMPT_FILES_FILE="$files_file"
|
|
8481
|
+
export LOKI_DA_PROMPT_OUT="$da_prompt_file"
|
|
8482
|
+
python3 << 'BUILD_DA_PROMPT'
|
|
8483
|
+
import os
|
|
8484
|
+
|
|
8485
|
+
with open(os.environ["LOKI_DA_PROMPT_FILES_FILE"], "r") as f:
|
|
8486
|
+
files = f.read().strip()
|
|
8487
|
+
with open(os.environ["LOKI_DA_PROMPT_DIFF_FILE"], "r") as f:
|
|
8488
|
+
diff = f.read().strip()
|
|
8489
|
+
|
|
8490
|
+
prompt = f"""You are a Devil's Advocate reviewer. Three independent reviewers ALL approved this change. Unanimous approval is a red flag for insufficient scrutiny. Your SOLE job is to find a Critical or High severity issue they missed.
|
|
8491
|
+
|
|
8492
|
+
Be adversarial and concrete. Hunt for: security holes, data loss, race conditions, broken error handling, silent failures, off-by-one and boundary bugs, resource leaks, injection, and logic that does not match intent. Do NOT rubber-stamp. If after genuine effort you find no Critical/High issue, say so honestly -- do not invent one.
|
|
8493
|
+
|
|
8494
|
+
Files changed:
|
|
8495
|
+
{files}
|
|
8496
|
+
|
|
8497
|
+
Diff:
|
|
8498
|
+
{diff}
|
|
8499
|
+
|
|
8500
|
+
Output format (STRICT - follow exactly):
|
|
8501
|
+
VERDICT: PASS or FAIL
|
|
8502
|
+
FINDINGS:
|
|
8503
|
+
- [severity] description (file:line)
|
|
8504
|
+
Severity levels: Critical, High, Medium, Low
|
|
8505
|
+
|
|
8506
|
+
Output VERDICT: FAIL only if you found a real Critical or High issue. Otherwise output VERDICT: PASS."""
|
|
8507
|
+
|
|
8508
|
+
with open(os.environ["LOKI_DA_PROMPT_OUT"], "w") as f:
|
|
8509
|
+
f.write(prompt)
|
|
8510
|
+
BUILD_DA_PROMPT
|
|
8511
|
+
unset LOKI_DA_PROMPT_DIFF_FILE LOKI_DA_PROMPT_FILES_FILE LOKI_DA_PROMPT_OUT
|
|
8512
|
+
|
|
8513
|
+
local da_prompt_text
|
|
8514
|
+
da_prompt_text=$(cat "$da_prompt_file")
|
|
8515
|
+
# Foreground (no &) so a Critical/High finding can set has_blocking
|
|
8516
|
+
# in THIS shell. || true so a non-zero CLI exit under set -e does not
|
|
8517
|
+
# abort the gate; a missing/empty reply is treated as no finding.
|
|
8518
|
+
_dispatch_reviewer "$da_prompt_text" "$da_output" || true
|
|
8519
|
+
|
|
8520
|
+
if [ -f "$da_output" ] && [ -s "$da_output" ]; then
|
|
8521
|
+
local da_verdict
|
|
8522
|
+
da_verdict=$(grep -i "^VERDICT:" "$da_output" | head -1 | sed 's/^VERDICT:[[:space:]]*//' | tr '[:lower:]' '[:upper:]' | tr -d '[:space:]')
|
|
8523
|
+
if [ "$da_verdict" = "FAIL" ] && grep -qiE "\[(Critical|High)\]" "$da_output"; then
|
|
8524
|
+
has_blocking=true
|
|
8525
|
+
# Audit accuracy: aggregate.json was written above (line ~8429)
|
|
8526
|
+
# with has_blocking=false (entering this block requires a
|
|
8527
|
+
# unanimous PASS, so the field was necessarily false). The DA
|
|
8528
|
+
# only ever raises it false->true, so patch the persisted
|
|
8529
|
+
# record to reflect the final outcome. Targeted field update
|
|
8530
|
+
# (not a re-move of the write) keeps every other reader of
|
|
8531
|
+
# aggregate.json undisturbed.
|
|
8532
|
+
export LOKI_DA_AGG_FILE="$review_dir/$review_id/aggregate.json"
|
|
8533
|
+
python3 << 'DA_AGG_PATCH' || true
|
|
8534
|
+
import json, os
|
|
8535
|
+
agg_file = os.environ["LOKI_DA_AGG_FILE"]
|
|
8536
|
+
try:
|
|
8537
|
+
with open(agg_file) as f:
|
|
8538
|
+
data = json.load(f)
|
|
8539
|
+
data["has_blocking"] = True
|
|
8540
|
+
with open(agg_file, "w") as f:
|
|
8541
|
+
json.dump(data, f, indent=2)
|
|
8542
|
+
except (OSError, ValueError):
|
|
8543
|
+
pass
|
|
8544
|
+
DA_AGG_PATCH
|
|
8545
|
+
unset LOKI_DA_AGG_FILE
|
|
8546
|
+
log_error "DEVIL'S ADVOCATE: found Critical/High issue the unanimous council missed -- BLOCK"
|
|
8547
|
+
{
|
|
8548
|
+
echo "DEVILS_ADVOCATE_BLOCK: Critical/High found after unanimous PASS"
|
|
8549
|
+
grep -iE "\[(Critical|High)\]" "$da_output" || true
|
|
8550
|
+
} >> "$review_dir/$review_id/anti-sycophancy.txt"
|
|
8551
|
+
else
|
|
8552
|
+
log_info "Devil's Advocate: no additional Critical/High issues found"
|
|
8553
|
+
echo "DEVILS_ADVOCATE_PASS: no Critical/High beyond unanimous council" \
|
|
8554
|
+
>> "$review_dir/$review_id/anti-sycophancy.txt"
|
|
8555
|
+
fi
|
|
8556
|
+
else
|
|
8557
|
+
log_warn "Devil's Advocate: no usable output (treating as no finding)"
|
|
8558
|
+
echo "DEVILS_ADVOCATE_NO_OUTPUT: reviewer produced no usable reply" \
|
|
8559
|
+
>> "$review_dir/$review_id/anti-sycophancy.txt"
|
|
8560
|
+
fi
|
|
8561
|
+
fi
|
|
8323
8562
|
fi
|
|
8324
8563
|
|
|
8325
8564
|
# Blocking decision
|
|
@@ -13963,14 +14202,14 @@ if __name__ == "__main__":
|
|
|
13963
14202
|
fi
|
|
13964
14203
|
# Test coverage gate
|
|
13965
14204
|
if [ "${PHASE_UNIT_TESTS:-true}" = "true" ]; then
|
|
13966
|
-
log_info "Quality gate: test
|
|
14205
|
+
log_info "Quality gate: test suite (pass/fail)..."
|
|
13967
14206
|
if enforce_test_coverage; then
|
|
13968
14207
|
clear_gate_failure "test_coverage"
|
|
13969
14208
|
else
|
|
13970
14209
|
local tc_count
|
|
13971
14210
|
tc_count=$(track_gate_failure "test_coverage")
|
|
13972
14211
|
gate_failures="${gate_failures}test_coverage,"
|
|
13973
|
-
log_warn "Test
|
|
14212
|
+
log_warn "Test suite gate FAILED ($tc_count consecutive) - must pass next iteration"
|
|
13974
14213
|
fi
|
|
13975
14214
|
fi
|
|
13976
14215
|
# BUG-ST-002: Check pause signal between quality gates (after test coverage)
|
|
@@ -13981,6 +14220,30 @@ if __name__ == "__main__":
|
|
|
13981
14220
|
fi
|
|
13982
14221
|
continue
|
|
13983
14222
|
fi
|
|
14223
|
+
# Mock integrity gate (P0-3): block on CRITICAL/HIGH mock problems.
|
|
14224
|
+
if [ "${LOKI_GATE_MOCK:-true}" = "true" ] && [ "$ITERATION_COUNT" -gt 0 ]; then
|
|
14225
|
+
log_info "Quality gate: mock integrity..."
|
|
14226
|
+
if enforce_mock_integrity; then
|
|
14227
|
+
clear_gate_failure "mock_integrity"
|
|
14228
|
+
else
|
|
14229
|
+
local mk_count
|
|
14230
|
+
mk_count=$(track_gate_failure "mock_integrity")
|
|
14231
|
+
gate_failures="${gate_failures}mock_integrity,"
|
|
14232
|
+
log_warn "Mock integrity gate FAILED ($mk_count consecutive) - CRITICAL/HIGH mock problems"
|
|
14233
|
+
fi
|
|
14234
|
+
fi
|
|
14235
|
+
# Test mutation integrity gate (P0-3): block on HIGH test-fitting.
|
|
14236
|
+
if [ "${LOKI_GATE_MUTATION:-true}" = "true" ] && [ "$ITERATION_COUNT" -gt 0 ]; then
|
|
14237
|
+
log_info "Quality gate: test mutation integrity..."
|
|
14238
|
+
if enforce_mutation_integrity; then
|
|
14239
|
+
clear_gate_failure "mutation_integrity"
|
|
14240
|
+
else
|
|
14241
|
+
local mt_count
|
|
14242
|
+
mt_count=$(track_gate_failure "mutation_integrity")
|
|
14243
|
+
gate_failures="${gate_failures}mutation_integrity,"
|
|
14244
|
+
log_warn "Mutation integrity gate FAILED ($mt_count consecutive) - HIGH test-fitting detected"
|
|
14245
|
+
fi
|
|
14246
|
+
fi
|
|
13984
14247
|
# Code review gate (upgraded from advisory, with escalation)
|
|
13985
14248
|
if [ "$PHASE_CODE_REVIEW" = "true" ] && [ "$ITERATION_COUNT" -gt 0 ]; then
|
|
13986
14249
|
log_info "Quality gate: code review..."
|
|
@@ -14052,7 +14315,7 @@ if __name__ == "__main__":
|
|
|
14052
14315
|
if [ "$ITERATION_COUNT" -gt 0 ]; then
|
|
14053
14316
|
run_doc_staleness_check
|
|
14054
14317
|
fi
|
|
14055
|
-
# Documentation quality gate - Gate
|
|
14318
|
+
# Documentation quality gate - Gate 7 (Documentation Coverage)
|
|
14056
14319
|
if [ "${LOKI_GATE_DOC_COVERAGE:-true}" = "true" ] && [ "$ITERATION_COUNT" -gt 0 ]; then
|
|
14057
14320
|
log_info "Quality gate: documentation coverage..."
|
|
14058
14321
|
if run_doc_quality_gate; then
|