npm - loki-mode - Versions diffs - 7.51.0 → 7.53.0 - Mend

loki-mode 7.51.0 → 7.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/completion-council.sh +2 -2
package/autonomy/grill.sh +1 -1
package/autonomy/lib/claude-flags.sh +15 -11
package/autonomy/lib/wiki_llm.py +1 -1
package/autonomy/loki +10 -10
package/autonomy/prd-checklist.sh +18 -8
package/autonomy/run.sh +121 -7
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +115 -0
package/docs/INSTALLATION.md +2 -2
package/loki-ts/dist/loki.js +2 -2
package/magic/core/debate.py +4 -2
package/magic/core/generator.py +1 -1
package/mcp/__init__.py +1 -1
package/package.json +1 -1
package/plugins/loki-mode/.claude-plugin/plugin.json +1 -1
package/providers/codex.sh +16 -11
package/references/multi-provider.md +1 -1
package/skills/model-selection.md +7 -4
package/skills/providers.md +2 -2
package/skills/quality-gates.md +8 -5
package/src/audit/index.js +84 -0

package/SKILL.md CHANGED Viewed

@@ -3,7 +3,7 @@ name: loki-mode
 description: Autonomous spec-driven build system with a built-in trust layer. It does not call work done until it is verified (RARV-C closure loop, 8 quality gates, completion council, verified-completion evidence gate). Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
 ---
-# Loki Mode v7.51.0
+# Loki Mode v7.53.0
 **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
@@ -407,4 +407,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
 ---
-**v7.51.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
+**v7.53.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 7.51.0
1	+ 7.53.0

package/autonomy/completion-council.sh CHANGED Viewed

@@ -2038,7 +2038,7 @@ ISSUES: CRITICAL:description (optional, one per line per issue)"
             ;;
         codex)
             if command -v codex &>/dev/null; then
-                verdict=$(codex exec --full-auto "$prompt" 2>/dev/null)
+                verdict=$(codex exec --sandbox workspace-write "$prompt" 2>/dev/null)
             fi
             ;;
         gemini)
@@ -2139,7 +2139,7 @@ REASON: your reasoning"
             ;;
         codex)
             if command -v codex &>/dev/null; then
-                verdict=$(codex exec --full-auto "$prompt" 2>/dev/null)
+                verdict=$(codex exec --sandbox workspace-write "$prompt" 2>/dev/null)
             fi
             ;;
         gemini)

package/autonomy/grill.sh CHANGED Viewed

@@ -227,7 +227,7 @@ grill_invoke_provider() {
                 return $GRILL_EXIT_ERROR
             fi
             local out
-            out="$(printf '%s' "$prompt" | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" codex exec --full-auto - 2>/dev/null)"
+            out="$(printf '%s' "$prompt" | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" codex exec --sandbox workspace-write - 2>/dev/null)"
             if [ -z "$out" ]; then
                 _grill_err "provider returned no output"
                 return $GRILL_EXIT_ERROR

package/autonomy/lib/claude-flags.sh CHANGED Viewed

@@ -63,31 +63,35 @@ loki_remaining_budget() {
     local budget_file="${TARGET_DIR:-.}/.loki/metrics/budget.json"
     local spend="0"
     if [ -f "$budget_file" ]; then
-        spend=$(python3 -c "
-import json, sys
+        # Pass the path via env var (os.environ), NOT string interpolation, so a
+        # path containing a single quote (or other python/shell-breaking char)
+        # cannot break the parse. Single-quoted program -> bash interpolates nothing.
+        spend=$(_LOKI_BUDGET_FILE="$budget_file" python3 -c '
+import json, os, sys
 try:
-    with open('$budget_file') as f:
+    with open(os.environ["_LOKI_BUDGET_FILE"]) as f:
         d = json.load(f)
-    v = d.get('current_spend', 0)
+    v = d.get("current_spend", 0)
     print(float(v))
 except Exception:
     print(0)
-" 2>/dev/null)
+' 2>/dev/null)
     fi
     # Compute remaining via python3 (bash floats are unreliable across awk/bc variations).
-    python3 -c "
-import sys
+    # Pass limit/spend via env vars too (same hardening; single-quoted program).
+    _LOKI_BUDGET_LIMIT="$limit" _LOKI_BUDGET_SPEND="$spend" python3 -c '
+import os, sys
 try:
-    limit = float('$limit')
-    spend = float('$spend')
+    limit = float(os.environ["_LOKI_BUDGET_LIMIT"])
+    spend = float(os.environ["_LOKI_BUDGET_SPEND"])
     rem = limit - spend
     # Strictly positive; otherwise emit nothing (caller decides whether to bail or warn).
     if rem > 0:
         # Round to 2 decimal places for the CLI.
-        print(f'{rem:.2f}')
+        print(f"{rem:.2f}")
 except Exception:
     pass
-" 2>/dev/null
+' 2>/dev/null
 }
 # ---------- Fallback model ----------

package/autonomy/lib/wiki_llm.py CHANGED Viewed

@@ -57,7 +57,7 @@ def invoke_llm(prompt, timeout=120):
     cmds = {
         "claude": ["claude", "-p", prompt],
-        "codex": ["codex", "exec", "--full-auto", prompt],
+        "codex": ["codex", "exec", "--sandbox", "workspace-write", prompt],
         "cline": ["cline", "-y", prompt],
         "aider": ["aider", "--message", prompt, "--yes-always", "--no-auto-commits"],
     }

package/autonomy/loki CHANGED Viewed

@@ -743,7 +743,7 @@ show_help() {
     echo "  --complex        Force complex complexity tier (8 phases)"
     echo "  --github         Enable GitHub issue import"
     echo "  --no-dashboard   Disable web dashboard"
-    echo "  --sandbox        Run in Docker sandbox for isolation"
+    echo "  --sandbox        Run in Docker sandbox for isolation (default: off; requires Docker)"
     echo "  --skip-memory    Skip loading memory context at startup"
     echo "  --fresh-prd      Regenerate the PRD from the codebase (no-PRD runs; ignores the reusable generated PRD)"
     echo "  --compliance PRESET  Enable compliance mode (default|healthcare|fintech|government)"
@@ -1063,7 +1063,7 @@ cmd_start() {
                 echo "  --github              Enable GitHub issue import"
                 echo "  --no-dashboard        Disable web dashboard"
                 echo "  --api                 Start dashboard API server alongside the build"
-                echo "  --sandbox             Run in Docker sandbox"
+                echo "  --sandbox             Run in Docker sandbox (default: off; requires Docker)"
                 echo "  --skip-memory         Skip loading memory context at startup"
                 echo "  --fresh-prd           Regenerate the PRD from the codebase on a no-PRD run"
                 echo "                        (ignores the reusable generated PRD; aliases: --regen-prd,"
@@ -3785,7 +3785,7 @@ cmd_provider_info() {
             echo "Name:        Codex CLI"
             echo "Vendor:      OpenAI"
             echo "CLI:         codex"
-            echo "Flag:        --full-auto"
+            echo "Flag:        --sandbox workspace-write"
             echo ""
             echo "Features:"
             echo "  - Autonomous mode"
@@ -5680,7 +5680,7 @@ cmd_run() {
                 echo "  --simple           Force simple complexity tier"
                 echo "  --complex          Force complex complexity tier"
                 echo "  --no-dashboard     Disable web dashboard"
-                echo "  --sandbox          Run in Docker sandbox"
+                echo "  --sandbox          Run in Docker sandbox (default: off; requires Docker)"
                 echo "  --no-plan          Skip auto-shown PRD analysis at startup"
                 echo "  --budget USD       Set cost budget limit"
                 echo ""
@@ -11641,7 +11641,7 @@ except Exception: pass
                         done; } && phase_exit=0 || phase_exit=$?
                     ;;
                 codex)
-                    (cd "$codebase_path" && codex exec --full-auto "$phase_prompt" 2>&1) || phase_exit=$?
+                    (cd "$codebase_path" && codex exec --sandbox workspace-write "$phase_prompt" 2>&1) || phase_exit=$?
                     ;;
                 cline)
                     (cd "$codebase_path" && cline -y "$phase_prompt" 2>&1) || phase_exit=$?
@@ -11814,7 +11814,7 @@ except Exception: pass
                     done; } && doc_exit=0 || doc_exit=$?
                 ;;
             codex)
-                (cd "$codebase_path" && codex exec --full-auto "$doc_prompt" 2>&1) || doc_exit=$?
+                (cd "$codebase_path" && codex exec --sandbox workspace-write "$doc_prompt" 2>&1) || doc_exit=$?
                 ;;
             cline)
                 (cd "$codebase_path" && cline -y "$doc_prompt" 2>&1) || doc_exit=$?
@@ -12445,7 +12445,7 @@ except Exception: pass
                 done && heal_exit=0 || heal_exit=$?
             ;;
         codex)
-            (cd "$codebase_path" && codex exec --full-auto "$heal_prompt" 2>&1) || heal_exit=$?
+            (cd "$codebase_path" && codex exec --sandbox workspace-write "$heal_prompt" 2>&1) || heal_exit=$?
             ;;
         cline)
             (cd "$codebase_path" && cline -y "$heal_prompt" 2>&1) || heal_exit=$?
@@ -22069,7 +22069,7 @@ USER TASK: ${prompt}"
                     claude -p "$full_prompt" 2>&1 || agent_exit=$?
                     ;;
                 codex)
-                    codex exec --full-auto "$full_prompt" 2>&1 || agent_exit=$?
+                    codex exec --sandbox workspace-write "$full_prompt" 2>&1 || agent_exit=$?
                     ;;
                 cline)
                     cline -y "$full_prompt" 2>&1 || agent_exit=$?
@@ -22200,7 +22200,7 @@ $diff"
             case "$provider" in
                 claude)  claude -p "$review_prompt" 2>&1 ;;
-                codex)   codex exec --full-auto "$review_prompt" 2>&1 ;;
+                codex)   codex exec --sandbox workspace-write "$review_prompt" 2>&1 ;;
                 cline)   cline -y "$review_prompt" 2>&1 ;;
                 *)       echo -e "${RED}Unknown provider: $provider${NC}"; return 1 ;;
             esac
@@ -23870,7 +23870,7 @@ _docs_invoke_provider() {
             result=$($t_prefix env CAVEMAN_DEFAULT_MODE=off claude -p "$prompt" 2>/dev/null) || exit_code=$?
             ;;
         codex)
-            result=$($t_prefix codex exec --full-auto "$prompt" 2>/dev/null) || exit_code=$?
+            result=$($t_prefix codex exec --sandbox workspace-write "$prompt" 2>/dev/null) || exit_code=$?
             ;;
         cline)
             result=$($t_prefix cline -y "$prompt" 2>/dev/null) || exit_code=$?

package/autonomy/prd-checklist.sh CHANGED Viewed

@@ -422,7 +422,11 @@ checklist_should_verify() {
 # non-cooperative agent with filesystem tools can read the reservation directly.
 #
 # Selection is idempotent and reproducible: count = clamp(round(0.25*N), 1, 5)
-# for N>=4 items; ordering by sha256 of each item's "id" (stable, not random).
+# for N>=2 items; ordering by sha256 of each item's "id" (stable, not random).
+# Small checklists (2 <= N < 4) reserve exactly 1 held-out item via the same
+# sha256-rank selection (the clamp floor of 1 guarantees coverage), so a small
+# spec's checklist is never fully gameable. N<2 is a no-op: holding out the only
+# item of a 1-item checklist would leave nothing to verify against in the loop.
 # Written once to .loki/checklist/held-out.json; never overwritten if present.
 checklist_select_heldout() {
     local heldout_file="${CHECKLIST_DIR:-".loki/checklist"}/held-out.json"
@@ -442,7 +446,7 @@ checklist_select_heldout() {
     #   PARTIAL kept=k dropped=d - some prior ids survived; we keep only survivors
     #   DUP_SKIP          - current checklist ids are not unique; the id-based
     #                       mechanism is unsound, so we reserve nothing (MEDIUM-2)
-    #   NOOP              - n<4 with no prior file, or other no-write outcome
+    #   NOOP              - n<2 with no prior file, or other no-write outcome
     # Honest caveat: re-selection or partial-survival after a regen can reserve
     # items the build loop already saw in earlier prompts (the hidden-from-loop
     # guarantee is best-effort once the checklist ids change mid-run).
@@ -512,7 +516,7 @@ if os.path.exists(out_path):
 if prior is not None:
     prior_ids = [i for i in prior.get('held_out', []) if i]
-    # A prior reservation of [] (e.g. an earlier n<4 run) is a valid no-op state;
+    # A prior reservation of [] (e.g. an earlier n<2 run) is a valid no-op state;
     # keep it idempotent rather than re-selecting now that n may have grown.
     if not prior_ids:
         print('IDEMPOTENT')
@@ -525,9 +529,11 @@ if prior is not None:
     if not survivors:
         # Fully stale: the checklist regenerated and orphaned the reservation.
         # Deterministically re-select from the CURRENT checklist.
-        if n < 4:
+        if n < 2:
+            # N<2: cannot hold out from a 1-item checklist (reserving the only
+            # item leaves nothing to verify against). No-op write of an empty set.
             atomic_write({'held_out': [], 'total_items': n,
-                          'note': 'n<4: no held-out reserved (re-selected after stale reservation)'})
+                          'note': 'n<2: no held-out reserved (re-selected after stale reservation)'})
             print('RESELECTED 0')
             sys.exit(0)
         held = fresh_selection()
@@ -542,11 +548,15 @@ if prior is not None:
     sys.exit(0)
 # No prior reservation: first selection.
-if n < 4:
-    # N>=4 gate: smaller checklists get no held-out (nothing to hide reliably).
-    atomic_write({'held_out': [], 'total_items': n, 'note': 'n<4: no held-out reserved'})
+if n < 2:
+    # N<2 gate: a 1-item (or empty) checklist cannot meaningfully hold out an
+    # item -- reserving the only item would leave nothing to verify against in
+    # the build loop. Write an empty set so downstream reads stay well-formed.
+    atomic_write({'held_out': [], 'total_items': n, 'note': 'n<2: no held-out reserved'})
     print('NOOP')
     sys.exit(0)
+# For 2 <= N < 4, fresh_selection() reserves exactly 1 item (select_count clamps
+# round(0.25*N) up to a floor of 1), so small specs are never fully gameable.
 held = fresh_selection()
 atomic_write({'held_out': held, 'total_items': n})

package/autonomy/run.sh CHANGED Viewed

@@ -585,7 +585,7 @@ BACKGROUND_MODE=${LOKI_BACKGROUND:-false}                # Run in background
 STAGED_AUTONOMY=${LOKI_STAGED_AUTONOMY:-false}           # Require plan approval
 AUDIT_LOG_ENABLED=${LOKI_AUDIT_LOG:-true}                # Enable audit logging (on by default)
 MAX_PARALLEL_AGENTS=${LOKI_MAX_PARALLEL_AGENTS:-10}      # Limit concurrent agents
-SANDBOX_MODE=${LOKI_SANDBOX_MODE:-false}                 # Docker sandbox mode
+SANDBOX_MODE=${LOKI_SANDBOX_MODE:-false}                 # Docker sandbox mode (informational; the real dispatch reads LOKI_SANDBOX_MODE at autonomy/loki:1965 and execs sandbox.sh -- this var is not consumed in run.sh)
 ALLOWED_PATHS=${LOKI_ALLOWED_PATHS:-""}                  # Empty = all paths allowed
 BLOCKED_COMMANDS=${LOKI_BLOCKED_COMMANDS:-"rm -rf /,dd if=,mkfs,:(){ :|:& };:"}
@@ -3264,7 +3264,7 @@ spawn_worktree_session() {
                 fi
                 ;;
             codex)
-                codex exec --full-auto --skip-git-repo-check \
+                codex exec --sandbox workspace-write --skip-git-repo-check \
                     "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
                     >> "$log_file" 2>&1 || _wt_exit=$?
                 ;;
@@ -3480,7 +3480,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
                 resolution=$(CAVEMAN_DEFAULT_MODE=off claude "${_cr_argv[@]}" -p "$conflict_prompt" --output-format text 2>/dev/null)
                 ;;
             codex)
-                resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
+                resolution=$(codex exec --sandbox workspace-write --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
                 ;;
             cline)
                 resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
@@ -6199,7 +6199,7 @@ check_command_allowed() {
     # run.sh does not directly execute arbitrary shell commands from user or agent
     # input. Command execution is handled by the AI CLI's own permission model:
     #   - Claude Code: --dangerously-skip-permissions (with its own allowlist)
-    #   - Codex CLI: --full-auto or exec --dangerously-bypass-approvals-and-sandbox
+    #   - Codex CLI: exec --sandbox workspace-write or exec --dangerously-bypass-approvals-and-sandbox
     #
     # HUMAN_INPUT.md content is injected as a text prompt to the AI agent (not
     # executed as a shell command), and is already guarded by:
@@ -8313,6 +8313,89 @@ enforce_mutation_integrity() {
     return 0
 }
+# ============================================================================
+# Semantic Test-Authenticity Gate (P1-3): wire tests/detect-semantic-test-problems.sh
+# as an OPT-IN completion gate. The detector catches the harder class of fake
+# tests that the regex detectors (gates 5+6) miss: assertions that look real but
+# verify nothing because the asserted value never flows through code under test
+# (literal-via-variable echo HIGH, mock-return echo MED, deleted assertions MED).
+#
+# ADVISORY-FIRST POSTURE (no-deadlock contract): this helper is invoked ONLY when
+# LOKI_GATE_SEMANTIC_TESTS=true (the elif guard at the completion-promise arm
+# short-circuits when off, so there is zero runtime cost on the default path).
+# When on, it runs the detector with --block-high (clean exit-code contract:
+# rc 2 iff a CRITICAL/HIGH finding exists). We surface ALL severities to a
+# findings file (advisory) and return nonzero ONLY on rc 2. Every other exit --
+# rc 0 (clean), rc 124 (timeout), detector absent, no test files, malformed
+# output -- returns 0 (pass/fall-through), so the autonomous loop can NEVER
+# deadlock on a clean run. Mirrors enforce_mock_integrity's invocation
+# (cd TARGET_DIR + LOKI_SCAN_DIR=TARGET_DIR + timeout), swapping --strict for
+# --block-high and deciding on the rc-2 contract instead of grepping stdout.
+# ============================================================================
+enforce_semantic_integrity() {
+    local loki_dir="${TARGET_DIR:-.}/.loki"
+    local quality_dir="$loki_dir/quality"
+    mkdir -p "$quality_dir"
+    local findings_file="$quality_dir/semantic-findings.txt"
+    local detector="$SCRIPT_DIR/../tests/detect-semantic-test-problems.sh"
+    local gate_timeout="${LOKI_GATE_TIMEOUT:-300}"
+    if [ ! -f "$detector" ]; then
+        log_info "Semantic test gate: detector not found, skipping (inconclusive)"
+        rm -f "$findings_file" 2>/dev/null || true
+        return 0
+    fi
+    local output rc
+    # --block-high exits 2 iff CRITICAL/HIGH present; 0 otherwise (clean wrapper).
+    output=$(cd "${TARGET_DIR:-.}" && LOKI_SCAN_DIR="${TARGET_DIR:-.}" \
+        timeout "$gate_timeout" bash "$detector" --block-high 2>&1)
+    rc=$?
+    # timeout exit 124 -- inconclusive, never block on a hang (deny-filter)
+    if [ "$rc" -eq 124 ]; then
+        log_warn "Semantic test gate: detector timed out after ${gate_timeout}s -- inconclusive"
+        rm -f "$findings_file" 2>/dev/null || true
+        return 0
+    fi
+    if [ "$rc" -eq 2 ]; then
+        # rc 2 == one or more CRITICAL/HIGH findings. Persist per-finding text.
+        {
+            echo "# Semantic test-authenticity findings (CRITICAL/HIGH block this completion)"
+            echo "$output" | grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' || true
+        } > "$findings_file"
+        log_warn "Semantic test gate: CRITICAL/HIGH fake-test problems detected -- BLOCK"
+        return 1
+    fi
+    # rc 0 (and any other non-2, non-124 code, e.g. a malformed run) -> PASS.
+    # Route any MED/LOW advisory findings to the injection file, else clear it.
+    local med_low
+    med_low=$(echo "$output" | grep -E '\[(MEDIUM|LOW)\]' || true)
+    if [ -n "$med_low" ]; then
+        {
+            echo "# Semantic test advisory findings (MED/LOW, non-blocking)"
+            echo "$med_low"
+        } > "$findings_file"
+    else
+        rm -f "$findings_file" 2>/dev/null || true
+    fi
+    log_info "Semantic test gate: PASS"
+    return 0
+}
+# P1-3 wrapper that runs the semantic gate and returns its exact rc, mirroring
+# _evidence_gate_and_surface so the completion-promise elif arm reads cleanly
+# (`! _semantic_gate_and_surface`). Returns nonzero ONLY when enforce_semantic_integrity
+# saw an rc-2 (CRITICAL/HIGH) result; all deny-filter cases already collapse to 0
+# inside enforce_semantic_integrity, so this never blocks a clean run.
+_semantic_gate_and_surface() {
+    local _rc=0
+    enforce_semantic_integrity || _rc=$?
+    return "$_rc"
+}
 # ============================================================================
 # 3-Reviewer Parallel Code Review (v5.35.0)
 # Specialist pool from skills/quality-gates.md with blind review
@@ -8637,7 +8720,7 @@ _dispatch_reviewer() {
                 --output-format text > "$review_output" 2>/dev/null
             ;;
         codex)
-            codex exec --full-auto --skip-git-repo-check "$prompt_text" \
+            codex exec --sandbox workspace-write --skip-git-repo-check "$prompt_text" \
                 > "$review_output" 2>/dev/null
             ;;
         cline)
@@ -9361,7 +9444,7 @@ ADVERSARIAL_EOF
             ;;
         codex)
             if command -v codex &>/dev/null; then
-                codex exec --full-auto --skip-git-repo-check "$adversarial_prompt" \
+                codex exec --sandbox workspace-write --skip-git-repo-check "$adversarial_prompt" \
                     > "$result_file" 2>/dev/null || true
             fi
             ;;
@@ -12248,6 +12331,23 @@ if d.get('blocked'):
         gate_failure_context="${gate_failure_context}FIX THESE ISSUES BEFORE PROCEEDING WITH NEW WORK."
     fi
+    # P1-3: surface specific semantic test-authenticity findings (which fake test,
+    # which line) when the opt-in gate (LOKI_GATE_SEMANTIC_TESTS) wrote them, so a
+    # block converges: the agent gets the exact files/lines to fix rather than a
+    # bare gate name. The file exists only when the gate ran AND found something
+    # (cleared on clean), so this is zero-cost on the default path and when off.
+    # Mirrors the static-analysis/test-results detail-surfacing above. Surfaced
+    # whether the run blocked (CRIT/HIGH) or only advised (MED/LOW): both inform
+    # the next iteration. Independent of gate-failures.txt presence (the
+    # completion-promise arm does not append a gate token).
+    if [ -f "${TARGET_DIR:-.}/.loki/quality/semantic-findings.txt" ]; then
+        local sem_findings
+        sem_findings=$(grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' "${TARGET_DIR:-.}/.loki/quality/semantic-findings.txt" 2>/dev/null | head -20 || true)
+        if [ -n "$sem_findings" ]; then
+            gate_failure_context="${gate_failure_context} SEMANTIC TEST-AUTHENTICITY FINDINGS (fix the fake tests; an assertion must verify a value that flows through the code under test, not echo a literal back): ${sem_findings}"
+        fi
+    fi
     # P2-2: high-severity spec-assumption context. When DISCOVERY recorded any
     # high-severity assumption (the spec was ambiguous in a high-impact place),
     # surface it to the build agent so it implements with the gap in view (or
@@ -14717,7 +14817,7 @@ if __name__ == "__main__":
                 # Uses dynamic tier from RARV phase (tier_param already set above)
                 { LOKI_CODEX_REASONING_EFFORT="$tier_param" \
                 CODEX_MODEL_REASONING_EFFORT="$tier_param" \
-                codex exec --full-auto --skip-git-repo-check \
+                codex exec --sandbox workspace-write --skip-git-repo-check \
                     "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
                 } && exit_code=0 || exit_code=$?
                 ;;
@@ -15347,6 +15447,20 @@ else:
                 log_warn "Completion claim rejected: assumption ledger gate found unresolved high-severity spec assumption(s)."
                 log_warn "  Details under .loki/council/assumption-block.json ; opt out with LOKI_ASSUMPTION_GATE=0"
                 # Fall through; keep iterating until high-sev assumptions resolve.
+            # P1-3: semantic test-authenticity gate (OPT-IN, default OFF). Catches
+            # fake tests that look real but verify nothing (literal-via-variable
+            # echo etc.) that the regex gates 5+6 miss. ADVISORY-FIRST: the arm is
+            # guarded by LOKI_GATE_SEMANTIC_TESTS=true, so by default it never runs
+            # (zero runtime cost, never blocks). When enabled it runs the detector
+            # with --block-high and rejects the completion ONLY on a CRITICAL/HIGH
+            # finding; clean / no-test-files / detector-absent / timeout / malformed
+            # all collapse to a pass inside _semantic_gate_and_surface, so the
+            # autonomous loop can never deadlock on a clean run. Mirrors the
+            # evidence / held-out / assumption arms above.
+            elif [ "$_completion_claimed" = 1 ] && [ "${LOKI_GATE_SEMANTIC_TESTS:-false}" = "true" ] && type _semantic_gate_and_surface &>/dev/null && ! _semantic_gate_and_surface; then
+                log_warn "Completion claim rejected: semantic test-authenticity gate found CRITICAL/HIGH fake-test problem(s)."
+                log_warn "  Details under .loki/quality/semantic-findings.txt ; opt-in gate -- disable with LOKI_GATE_SEMANTIC_TESTS=false"
+                # Fall through; keep iterating until the fake tests are fixed.
             elif [ "$_completion_claimed" = 1 ]; then
                 echo ""
                 if [ -n "$COMPLETION_PROMISE" ]; then

package/dashboard/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ Modules:
     control: Session control API (start/stop/pause/resume)
 """
-__version__ = "7.51.0"
+__version__ = "7.53.0"
 # Expose the control app for easy import
 try:

package/dashboard/server.py CHANGED Viewed

@@ -3248,6 +3248,121 @@ async def get_audit_summary(days: int = 7):
     return audit.get_audit_summary(days=days)
+# Continuous compliance surface (P3-11).
+#
+# Exposes the agent audit chain's compliance posture as an always-available
+# live endpoint. There is NO background scheduler in this surface (that is
+# infra, out of scope): the report is regenerated from the CURRENT audit
+# state on every request, so the endpoint is "continuous" in the sense that
+# it always reflects live state -- never a stale cached snapshot.
+#
+# The report is produced by the authoritative Node compliance engine
+# (src/audit/index.js, the single source of truth for SOC2/ISO/GDPR control
+# mappings) via its `report` CLI shim, so the Python surface never
+# reimplements (and never drifts from) the mapping logic. The chain it reads
+# is the JS AGENT chain at <project>/.loki/audit/audit.jsonl -- a different
+# chain from the Python dashboard chain that /api/enterprise/audit serves
+# (the two are reconciled by the cross-link verifier, not merged), so this
+# endpoint deliberately does NOT gate on audit.is_audit_enabled() (that flag
+# governs the Python chain). When the agent chain has no entries the report
+# is returned honestly with totalAuditEntries == 0; no fabricated pass.
+_COMPLIANCE_TYPES = ("soc2", "iso27001", "gdpr")
+@app.get("/api/compliance", dependencies=[Depends(auth.require_scope("audit"))])
+def get_compliance_status(report_type: str = Query("soc2", alias="type")):
+    """Live compliance status for the active project's agent audit chain.
+    Auth/tenant scoping: requires the `audit` scope (same gate as the
+    /api/enterprise/audit family). The data is filesystem state scoped to
+    the active project via _get_loki_dir(), exactly like the other
+    .loki-backed read endpoints; there is no DB tenant_id on a JSONL file
+    to enforce against.
+    Query: ?type=soc2|iso27001|gdpr (default soc2).
+    Returns the compliance report JSON regenerated from CURRENT audit
+    state on every call. If no audit data has been recorded the report is
+    honestly empty (totalAuditEntries == 0), not a fabricated compliant
+    verdict. If the Node engine is unavailable, returns an honest
+    available:false payload (HTTP 200) rather than masquerading as "no
+    compliance".
+    """
+    if not _read_limiter.check("compliance"):
+        raise HTTPException(status_code=429, detail="Rate limit exceeded")
+    if report_type not in _COMPLIANCE_TYPES:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid type: {report_type}. Must be one of {list(_COMPLIANCE_TYPES)}",
+        )
+    import shutil
+    # The agent audit chain lives under <project>/.loki/audit; _get_loki_dir()
+    # returns the .loki dir, so the project root is its parent.
+    project_dir = str(_get_loki_dir().parent.resolve())
+    repo_root = _Path(__file__).resolve().parent.parent
+    index_js = repo_root / "src" / "audit" / "index.js"
+    node_bin = shutil.which("node")
+    if node_bin is None or not index_js.exists():
+        return {
+            "available": False,
+            "reason": (
+                "Node runtime not found"
+                if node_bin is None
+                else f"compliance engine not found at {index_js}"
+            ),
+            "reportType": report_type,
+            "projectDir": project_dir,
+            "report": None,
+        }
+    try:
+        proc = subprocess.run(
+            [node_bin, str(index_js), "report", report_type, project_dir],
+            capture_output=True,
+            text=True,
+            timeout=30,
+            check=False,
+        )
+    except (OSError, subprocess.SubprocessError) as exc:
+        return {
+            "available": False,
+            "reason": f"compliance engine invocation failed: {exc}",
+            "reportType": report_type,
+            "projectDir": project_dir,
+            "report": None,
+        }
+    if proc.returncode != 0:
+        return {
+            "available": False,
+            "reason": (proc.stderr or "compliance engine returned non-zero").strip()[:500],
+            "reportType": report_type,
+            "projectDir": project_dir,
+            "report": None,
+        }
+    try:
+        report = json.loads(proc.stdout.strip())
+    except json.JSONDecodeError:
+        return {
+            "available": False,
+            "reason": "compliance engine produced non-JSON output",
+            "reportType": report_type,
+            "projectDir": project_dir,
+            "report": None,
+        }
+    return {
+        "available": True,
+        "reportType": report_type,
+        "projectDir": project_dir,
+        "report": report,
+    }
 # =============================================================================
 # File-based Session Endpoints (reads from .loki/ flat files)
 # =============================================================================

package/docs/INSTALLATION.md CHANGED Viewed

@@ -2,7 +2,7 @@
 The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
-**Version:** v7.51.0
+**Version:** v7.53.0
 ---
@@ -396,7 +396,7 @@ provider works inside the container. Provide auth with your Anthropic API key:
 # Run Loki Mode in Docker (Claude provider, API-key auth)
 docker run --rm -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
   -v $(pwd):/workspace -w /workspace \
-  asklokesh/loki-mode:7.51.0 start ./my-spec.md
+  asklokesh/loki-mode:7.53.0 start ./my-spec.md
 ```
 ##### docker compose + .env (no host install)

package/loki-ts/dist/loki.js CHANGED Viewed

@@ -1,5 +1,5 @@
 // @bun
-var r6=Object.defineProperty;var t6=($)=>$;function i6($,Q){this[$]=t6.bind(null,Q)}var h=($,Q)=>{for(var Z in Q)r6($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:i6.bind(Q,Z)})};var L=($,Q)=>()=>($&&(Q=$($=0)),Q);var K$=import.meta.require;var D1={};h(D1,{lokiDir:()=>P,homeLokiDir:()=>n$,findRepoRootForVersion:()=>o$,REPO_ROOT:()=>g});import{resolve as n,dirname as d$}from"path";import{fileURLToPath as e6}from"url";import{existsSync as P$}from"fs";import{homedir as $Q}from"os";function QQ(){let $=S1;for(let Q=0;Q<6;Q++){if(P$(n($,"VERSION"))&&P$(n($,"autonomy/run.sh")))return $;let Z=d$($);if(Z===$)break;$=Z}return n(S1,"..","..","..")}function o$($){let Q=$;for(let Z=0;Z<6;Z++){if(P$(n(Q,"VERSION"))&&P$(n(Q,"autonomy/run.sh")))return Q;let z=d$(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function n$(){return n($Q(),".loki")}var S1,g;var b=L(()=>{S1=d$(e6(import.meta.url));g=QQ()});import{readFileSync as ZQ}from"fs";import{resolve as zQ,dirname as XQ}from"path";import{fileURLToPath as KQ}from"url";function j$(){if($$!==null)return $$;let $="7.51.0";if(typeof $==="string"&&$.length>0)return $$=$,$$;try{let Q=XQ(KQ(import.meta.url)),Z=o$(Q);$$=ZQ(zQ(Z,"VERSION"),"utf-8").trim()}catch{$$="unknown"}return $$}var $$=null;var a$=L(()=>{b()});var b1={};h(b1,{runOrThrow:()=>qQ,run:()=>k,commandVersion:()=>WQ,commandExists:()=>f,ShellError:()=>s$});async function k($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,X;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}X=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[q,K,W]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:q,stderr:K,exitCode:W}}finally{if(z)clearTimeout(z);if(X)clearTimeout(X)}}async function qQ($,Q={}){let Z=await k($,Q);if(Z.exitCode!==0)throw new s$(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function f($){let Q=VQ($),Z=await k(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function VQ($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function WQ($,Q="--version"){if(!await f($))return null;let z=await k([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var s$;var d=L(()=>{s$=class s$ extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return JQ?"":$}var JQ,T,S,_,wZ,I,R,y,V;var c=L(()=>{JQ=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),S=a("\x1B[0;32m"),_=a("\x1B[1;33m"),wZ=a("\x1B[0;34m"),I=a("\x1B[0;36m"),R=a("\x1B[1m"),y=a("\x1B[2m"),V=a("\x1B[0m")});import{existsSync as wQ}from"fs";async function Q$(){if(G$!==void 0)return G$;let $="/opt/homebrew/bin/python3.12";if(wQ($))return G$=$,$;let Q=await f("python3.12");if(Q)return G$=Q,Q;let Z=await f("python3");return G$=Z,Z}async function Z$($,Q={}){let Z=await Q$();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return k([Z,"-c",$],Q)}var G$;var q$=L(()=>{d()});var e1={};h(e1,{runStatus:()=>uQ});import{existsSync as v,readFileSync as W$,readdirSync as d1,statSync as o1}from"fs";import{resolve as C,basename as DQ}from"path";import{homedir as CQ}from"os";function n1($){let Q=Math.trunc($);if(Q>=1e6)return`${(Math.trunc(Q/1e6*10)/10).toFixed(1)}M`;if(Q>=1000)return`${(Math.trunc(Q/1000*10)/10).toFixed(1)}K`;return String(Q)}function a1($,Q,Z){if(Q===0)return null;let z=Math.trunc($*100/Q),X=Math.trunc($*k$/Q);if(X>k$)X=k$;let q=k$-X,K=S;if(z>=80)K=T;else if(z>=50)K=_;let W="=".repeat(Math.max(0,X))+" ".repeat(Math.max(0,q)),J=n1($),U=n1(Q);return`  ${R}${Z}${V} ${K}[${W}]${V} ${z}% (${J} / ${U})`}async function hQ(){if(await f("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${V}
+var r6=Object.defineProperty;var t6=($)=>$;function i6($,Q){this[$]=t6.bind(null,Q)}var h=($,Q)=>{for(var Z in Q)r6($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:i6.bind(Q,Z)})};var L=($,Q)=>()=>($&&(Q=$($=0)),Q);var K$=import.meta.require;var D1={};h(D1,{lokiDir:()=>P,homeLokiDir:()=>n$,findRepoRootForVersion:()=>o$,REPO_ROOT:()=>g});import{resolve as n,dirname as d$}from"path";import{fileURLToPath as e6}from"url";import{existsSync as P$}from"fs";import{homedir as $Q}from"os";function QQ(){let $=S1;for(let Q=0;Q<6;Q++){if(P$(n($,"VERSION"))&&P$(n($,"autonomy/run.sh")))return $;let Z=d$($);if(Z===$)break;$=Z}return n(S1,"..","..","..")}function o$($){let Q=$;for(let Z=0;Z<6;Z++){if(P$(n(Q,"VERSION"))&&P$(n(Q,"autonomy/run.sh")))return Q;let z=d$(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function n$(){return n($Q(),".loki")}var S1,g;var b=L(()=>{S1=d$(e6(import.meta.url));g=QQ()});import{readFileSync as ZQ}from"fs";import{resolve as zQ,dirname as XQ}from"path";import{fileURLToPath as KQ}from"url";function j$(){if($$!==null)return $$;let $="7.53.0";if(typeof $==="string"&&$.length>0)return $$=$,$$;try{let Q=XQ(KQ(import.meta.url)),Z=o$(Q);$$=ZQ(zQ(Z,"VERSION"),"utf-8").trim()}catch{$$="unknown"}return $$}var $$=null;var a$=L(()=>{b()});var b1={};h(b1,{runOrThrow:()=>qQ,run:()=>k,commandVersion:()=>WQ,commandExists:()=>f,ShellError:()=>s$});async function k($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,X;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}X=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[q,K,W]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:q,stderr:K,exitCode:W}}finally{if(z)clearTimeout(z);if(X)clearTimeout(X)}}async function qQ($,Q={}){let Z=await k($,Q);if(Z.exitCode!==0)throw new s$(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function f($){let Q=VQ($),Z=await k(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function VQ($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function WQ($,Q="--version"){if(!await f($))return null;let z=await k([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var s$;var d=L(()=>{s$=class s$ extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return JQ?"":$}var JQ,T,S,_,wZ,I,R,y,V;var c=L(()=>{JQ=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),S=a("\x1B[0;32m"),_=a("\x1B[1;33m"),wZ=a("\x1B[0;34m"),I=a("\x1B[0;36m"),R=a("\x1B[1m"),y=a("\x1B[2m"),V=a("\x1B[0m")});import{existsSync as wQ}from"fs";async function Q$(){if(G$!==void 0)return G$;let $="/opt/homebrew/bin/python3.12";if(wQ($))return G$=$,$;let Q=await f("python3.12");if(Q)return G$=Q,Q;let Z=await f("python3");return G$=Z,Z}async function Z$($,Q={}){let Z=await Q$();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return k([Z,"-c",$],Q)}var G$;var q$=L(()=>{d()});var e1={};h(e1,{runStatus:()=>uQ});import{existsSync as v,readFileSync as W$,readdirSync as d1,statSync as o1}from"fs";import{resolve as C,basename as DQ}from"path";import{homedir as CQ}from"os";function n1($){let Q=Math.trunc($);if(Q>=1e6)return`${(Math.trunc(Q/1e6*10)/10).toFixed(1)}M`;if(Q>=1000)return`${(Math.trunc(Q/1000*10)/10).toFixed(1)}K`;return String(Q)}function a1($,Q,Z){if(Q===0)return null;let z=Math.trunc($*100/Q),X=Math.trunc($*k$/Q);if(X>k$)X=k$;let q=k$-X,K=S;if(z>=80)K=T;else if(z>=50)K=_;let W="=".repeat(Math.max(0,X))+" ".repeat(Math.max(0,q)),J=n1($),U=n1(Q);return`  ${R}${Z}${V} ${K}[${W}]${V} ${z}% (${J} / ${U})`}async function hQ(){if(await f("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${V}
 `),process.stdout.write(`Install with:
 `),process.stdout.write(`  brew install jq    (macOS)
 `),process.stdout.write(`  apt install jq     (Debian/Ubuntu)
@@ -790,4 +790,4 @@ Set LOKI_LEGACY_BASH=1 to force the bash CLI for every command.
 `),2}default:return process.stderr.write(`Unknown command: ${Q}
 `),process.stderr.write(s6),2}}l1();process.on("SIGINT",()=>process.exit(130));process.on("SIGTERM",()=>process.exit(143));var KZ=await XZ(Bun.argv.slice(2));process.exit(KZ);
-//# debugId=8015709BAB9E625464756E2164756E21
+//# debugId=3BF6CF9B99A2BD7E64756E2164756E21

package/magic/core/debate.py CHANGED Viewed

@@ -482,8 +482,10 @@ class DebateRunner:
         if provider == "claude":
             return ["claude", "--dangerously-skip-permissions", "-p", prompt]
         if provider == "codex":
-            # Codex uses `exec --full-auto` with the prompt as positional.
-            return ["codex", "exec", "--full-auto", prompt]
+            # Codex uses `exec --sandbox workspace-write` with the prompt as
+            # positional (codex 0.132.0 deprecated --full-auto; workspace-write
+            # is the documented replacement, exec is non-interactive by default).
+            return ["codex", "exec", "--sandbox", "workspace-write", prompt]
         if provider == "gemini":
             return ["gemini", "--approval-mode=yolo", prompt]
         if provider == "cline":

package/magic/core/generator.py CHANGED Viewed

@@ -180,7 +180,7 @@ class ComponentGenerator:
         if provider == "claude":
             cmd = base_cmd + [binary, "-p", prompt]
         elif provider == "codex":
-            cmd = base_cmd + [binary, "exec", "--full-auto", prompt]
+            cmd = base_cmd + [binary, "exec", "--sandbox", "workspace-write", prompt]
         elif provider == "gemini":
             cmd = base_cmd + [binary, "--approval-mode=yolo", prompt]
         elif provider == "cline":

package/mcp/__init__.py CHANGED Viewed

@@ -57,4 +57,4 @@ try:
 except ImportError:
     __all__ = ['mcp']
-__version__ = '7.51.0'
+__version__ = '7.53.0'

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "loki-mode",
   "mcpName": "io.github.asklokesh/loki-mode",
-  "version": "7.51.0",
+  "version": "7.53.0",
   "description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 8 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
   "keywords": [
     "agent",

package/plugins/loki-mode/.claude-plugin/plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
   "name": "loki-mode",
   "displayName": "Loki Mode",
-  "version": "7.51.0",
+  "version": "7.53.0",
   "description": "Autonomous spec-to-product build system with a built-in trust layer (RARV-C closure loop, 8 quality gates, completion council). Ships Loki's spec-hardening, drift-detection, and deterministic PR verification commands plus the Loki MCP server.",
   "author": {
     "name": "Autonomi",

package/providers/codex.sh CHANGED Viewed

@@ -29,10 +29,14 @@ PROVIDER_CLI="codex"
 # CLI Invocation
 # Note: codex uses positional prompt after "exec" subcommand
-# VERIFIED: exec --full-auto confirmed in codex exec --help (v0.98.0)
-# --full-auto: sets --ask-for-approval on-request + --sandbox workspace-write (v0.98.0)
+# VERIFIED: codex 0.132.0 deprecates --full-auto (prints a deprecation warning
+# and the flag is gone from `codex exec --help`). Use --sandbox workspace-write,
+# which is the documented replacement and the sandbox --full-auto expanded to.
+# `codex exec` is the non-interactive subcommand: it runs at approval "never"
+# with no --ask-for-approval flag, so --sandbox workspace-write alone keeps the
+# loop fully autonomous (verified against codex 0.132.0: no approval prompt).
 # Alternative: "exec --dangerously-bypass-approvals-and-sandbox" (legacy, no sandbox)
-PROVIDER_AUTONOMOUS_FLAG="exec --full-auto --skip-git-repo-check"
+PROVIDER_AUTONOMOUS_FLAG="exec --sandbox workspace-write --skip-git-repo-check"
 PROVIDER_PROMPT_FLAG=""
 PROVIDER_PROMPT_POSITIONAL=true
@@ -124,7 +128,7 @@ provider_version() {
 provider_invoke() {
     local prompt="$1"
     shift
-    codex exec --full-auto --skip-git-repo-check \
+    codex exec --sandbox workspace-write --skip-git-repo-check \
         --model "$PROVIDER_MODEL_DEVELOPMENT" \
         "$prompt" "$@"
 }
@@ -182,11 +186,13 @@ resolve_model_for_tier() {
 # Tier-aware invocation.
 #
-# v7.4.18: aligned with codex CLI v0.125.0 (latest as of 2026-04-26).
-# Replaced --full-auto preset with the explicit flags it expands to:
-#   --ask-for-approval never
-#   --sandbox danger-full-access
-# Forward-compatible if the preset is renamed; readable in process listings.
+# Aligned with codex CLI 0.132.0 (verified: --full-auto deprecated/removed
+# from `codex exec --help`). `codex exec` is the non-interactive subcommand and
+# runs at approval "never" with no --ask-for-approval flag, so --sandbox
+# workspace-write alone keeps the loop autonomous (verified: no approval prompt
+# on codex 0.132.0). workspace-write is the documented --full-auto replacement
+# and the safer default (scoped disk writes) over danger-full-access; readable
+# in process listings.
 #
 # Optional env knobs:
 #   LOKI_CODEX_WEB_SEARCH=true      enable codex --search (live web)
@@ -227,8 +233,7 @@ provider_invoke_with_tier() {
     LOKI_CODEX_REASONING_EFFORT="$effort" \
     CODEX_MODEL_REASONING_EFFORT="$effort" \
     codex exec \
-        --ask-for-approval never \
-        --sandbox danger-full-access \
+        --sandbox workspace-write \
         --skip-git-repo-check \
         --model "$model" \
         "${extra_flags[@]}" \

package/references/multi-provider.md CHANGED Viewed

@@ -286,7 +286,7 @@ All CLI flags have been verified against actual CLI help output:
 | Provider | Flag | Verified Version | Notes |
 |----------|------|------------------|-------|
 | Claude | `--dangerously-skip-permissions` | v2.1.34 | Autonomous mode |
-| Codex | `--full-auto` | v0.98.0 | Recommended; legacy: `exec --dangerously-bypass-approvals-and-sandbox` |
+| Codex | `--sandbox workspace-write` | v0.132.0 | Recommended (--full-auto deprecated 0.125+); legacy: `exec --dangerously-bypass-approvals-and-sandbox` |
 | Cline | `--auto-approve` | latest | Autonomous mode |
 | Aider | `--yes-always` | latest | Autonomous mode |

package/skills/model-selection.md CHANGED Viewed

@@ -231,13 +231,16 @@ Claude models support an `effort` parameter that controls reasoning depth withou
 **Note:** The effort parameter and thinking prefixes serve different purposes. Effort controls the model's internal reasoning budget; thinking prefixes guide the structure of the response.
-### Codex --full-auto Flag
+### Codex --sandbox workspace-write Flag
-Codex CLI v0.98.0 supports `--full-auto` as the recommended autonomous mode flag, replacing the verbose `exec --dangerously-bypass-approvals-and-sandbox` invocation:
+Codex CLI deprecated `--full-auto` in v0.125+ (removed from `codex exec --help`,
+emits a deprecation warning if used). The documented replacement is
+`--sandbox workspace-write`. The `exec` subcommand is non-interactive by default
+(approval: never), so the sandbox flag alone keeps the loop autonomous:
 ```bash
-# Recommended (v0.98.0+)
-codex --full-auto "$prompt"
+# Recommended (codex 0.125+)
+codex exec --sandbox workspace-write "$prompt"
 # Legacy (still supported)
 codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"

package/skills/providers.md CHANGED Viewed

@@ -6,7 +6,7 @@ Loki Mode supports four AI providers for autonomous execution.
 > **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
 > - Claude: `--dangerously-skip-permissions` (verified)
-> - Codex: `exec --full-auto --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
+> - Codex: `exec --sandbox workspace-write --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs; --full-auto deprecated in codex 0.125+, workspace-write is the documented replacement) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
 | Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
 |---------|-------------|--------------|-----------|-------|
@@ -70,7 +70,7 @@ Task(model="haiku", ...)   # Fast tier (parallelize)
 **Invocation:**
 ```bash
 # Recommended (v0.98.0+)
-codex exec --full-auto --skip-git-repo-check "$prompt"
+codex exec --sandbox workspace-write --skip-git-repo-check "$prompt"
 # Legacy (still supported)
 codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"

package/skills/quality-gates.md CHANGED Viewed

@@ -2,12 +2,14 @@
 **Never ship code without passing all quality gates.**
-## The 8 Quality Gates
+## The Quality Gates (8 default-on + 1 opt-in)
-Every gate below is wired into the orchestration loop (`autonomy/run.sh`) and
-blocks completion when it fails. The table lists exactly what each gate detects,
-what it does NOT detect (so you never over-trust a green gate), its opt-out flag,
-and its blocking behavior. Transcribe this list verbatim; do not recompute it.
+Every gate below is wired into the orchestration loop (`autonomy/run.sh`). The 8
+numbered gates are default-on and block completion when they fail; the opt-in
+gate (marked below) is default-OFF and runs only when its flag is set. The table
+lists exactly what each gate detects, what it does NOT detect (so you never
+over-trust a green gate), its opt-out flag, and its blocking behavior. Transcribe
+this list verbatim; do not recompute it.
 | # | Gate | Detects | Does NOT detect | Blocking | Opt-out flag |
 |---|------|---------|-----------------|----------|--------------|
@@ -19,6 +21,7 @@ and its blocking behavior. Transcribe this list verbatim; do not recompute it.
 | 6 | Test Mutation Detector | Assertion-value churn alongside implementation changes (test-fitting), low assertion density (`tests/detect-test-mutations.sh`); HIGH blocks | Logically-correct-but-weak assertions | Yes (HIGH blocks) | `LOKI_GATE_MUTATION=false` |
 | 7 | Documentation Coverage | README presence, docs freshness within 10 commits, API docs for exported symbols in packages | Whether the docs are accurate or useful | Yes | `LOKI_GATE_DOC_COVERAGE=false` |
 | 8 | Magic Modules Debate | Spec-vs-implementation debate findings on generated Magic Modules; BLOCK-severity findings block | Issues outside the Magic Modules debate scope | Yes (BLOCK severity) | `LOKI_GATE_MAGIC_DEBATE=false` |
+| 9 (opt-in, default OFF) | Semantic Test-Authenticity | Fake tests that look real but verify nothing (literal-via-variable echo, mock-return echo, deleted assertions) that gates 5+6 miss (`tests/detect-semantic-test-problems.sh --block-high`); CRITICAL/HIGH block | Deep dataflow, legitimate computed-literal assertions, Python/shell tests (JS/TS only); MED/LOW are advisory | Only when enabled, and only on CRITICAL/HIGH; runs solely on a completion claim | Opt-IN: `LOKI_GATE_SEMANTIC_TESTS=true` to enable (default off = not invoked, never blocks) |
 **Severity-based blocking** ties the review gates together: any Critical or High
 finding blocks completion. Medium, Low, and cosmetic findings are advisory and

package/src/audit/index.js CHANGED Viewed

@@ -83,6 +83,84 @@ function exportReport(type, opts) {
   return compliance.exportReportJson(report);
 }
+/**
+ * Generate a compliance report as a plain object, with the agent-chain
+ * tamper-evidence verdict folded in.
+ *
+ * This is the object form intended for surfaces (e.g. the dashboard
+ * /api/compliance endpoint) that need the report as data rather than a
+ * pre-serialized string. It always reflects the REAL audit chain:
+ *
+ *   - The report body is generated from the live audit entries
+ *     (`_log.readEntries()`), never fabricated.
+ *   - `chainIntegrity` is populated from `verifyChain()` so the report
+ *     carries the true tamper-evidence state of the underlying chain.
+ *     For the SOC2 report this fills the `chainIntegrity: null` slot the
+ *     generator leaves for the caller; for the other report types it is
+ *     attached under the same key for a uniform surface contract.
+ *
+ * When the chain has no entries the report is still returned honestly
+ * with `totalAuditEntries: 0` (an empty-but-valid report), never a
+ * fabricated "compliant" verdict.
+ *
+ * @param {string} type - 'soc2', 'iso27001', or 'gdpr'
+ * @param {object} [opts] - Report options (projectName, period, etc.)
+ * @returns {object} The compliance report object with chainIntegrity set.
+ */
+function getReport(type, opts) {
+  if (!_initialized) init();
+  var report = generateReport(type, opts);
+  // Fold the real tamper-evidence verdict into the report. Do not let a
+  // verification error fabricate a pass: capture it honestly instead.
+  try {
+    report.chainIntegrity = _log.verifyChain();
+  } catch (e) {
+    report.chainIntegrity = {
+      valid: false,
+      entries: report.totalAuditEntries || 0,
+      brokenAt: null,
+      error: 'chain verification failed: ' + String((e && e.message) || e),
+    };
+  }
+  return report;
+}
+/**
+ * CLI shim so a non-Node surface (e.g. the Python dashboard) can fetch a
+ * compliance report for a given project directory as JSON on stdout.
+ *
+ * This mirrors the inverse of dashboard/audit.py's `_unified_cli()`
+ * (which lets the Node-side unified verifier read the Python chain).
+ *
+ * Invoked as:
+ *   node src/audit/index.js report <type> <projectDir>
+ *
+ * <type> is one of soc2 | iso27001 | gdpr. <projectDir> is the project
+ * root whose .loki/audit/audit.jsonl chain is read. Prints a single JSON
+ * object to stdout. Returns exit 0 on success, 2 on usage error.
+ *
+ * The report is generated from the REAL chain; an absent/empty chain
+ * yields an honest empty report (totalAuditEntries: 0), not a fake pass.
+ */
+function _cli(argv) {
+  var args = argv || [];
+  var VALID_TYPES = { soc2: true, iso27001: true, gdpr: true };
+  if (args.length < 2 || args[0] !== 'report' || !VALID_TYPES[args[1]]) {
+    process.stdout.write(JSON.stringify({
+      error: 'usage: index.js report {soc2|iso27001|gdpr} <projectDir>',
+    }) + '\n');
+    return 2;
+  }
+  var type = args[1];
+  var projectDir = args[2] || process.cwd();
+  destroy();
+  init(projectDir);
+  var report = getReport(type);
+  destroy();
+  process.stdout.write(JSON.stringify(report) + '\n');
+  return 0;
+}
 /**
  * Check if a provider is allowed by data residency policy.
  */
@@ -167,6 +245,7 @@ module.exports = {
   verifyChain: verifyChain,
   generateReport: generateReport,
   exportReport: exportReport,
+  getReport: getReport,
   checkProvider: checkProvider,
   isAirGapped: isAirGapped,
   readEntries: readEntries,
@@ -177,3 +256,8 @@ module.exports = {
   verifyUnified: verifyUnified,
   writeWitness: writeWitness,
 };
+// CLI entry point: `node src/audit/index.js report <type> <projectDir>`.
+if (require.main === module) {
+  process.exit(_cli(process.argv.slice(2)));
+}