loki-mode 7.51.0 → 7.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -3,7 +3,7 @@ name: loki-mode
3
3
  description: Autonomous spec-driven build system with a built-in trust layer. It does not call work done until it is verified (RARV-C closure loop, 8 quality gates, completion council, verified-completion evidence gate). Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
4
4
  ---
5
5
 
6
- # Loki Mode v7.51.0
6
+ # Loki Mode v7.53.0
7
7
 
8
8
  **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
9
9
 
@@ -407,4 +407,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
407
407
 
408
408
  ---
409
409
 
410
- **v7.51.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
410
+ **v7.53.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
package/VERSION CHANGED
@@ -1 +1 @@
1
- 7.51.0
1
+ 7.53.0
@@ -2038,7 +2038,7 @@ ISSUES: CRITICAL:description (optional, one per line per issue)"
2038
2038
  ;;
2039
2039
  codex)
2040
2040
  if command -v codex &>/dev/null; then
2041
- verdict=$(codex exec --full-auto "$prompt" 2>/dev/null)
2041
+ verdict=$(codex exec --sandbox workspace-write "$prompt" 2>/dev/null)
2042
2042
  fi
2043
2043
  ;;
2044
2044
  gemini)
@@ -2139,7 +2139,7 @@ REASON: your reasoning"
2139
2139
  ;;
2140
2140
  codex)
2141
2141
  if command -v codex &>/dev/null; then
2142
- verdict=$(codex exec --full-auto "$prompt" 2>/dev/null)
2142
+ verdict=$(codex exec --sandbox workspace-write "$prompt" 2>/dev/null)
2143
2143
  fi
2144
2144
  ;;
2145
2145
  gemini)
package/autonomy/grill.sh CHANGED
@@ -227,7 +227,7 @@ grill_invoke_provider() {
227
227
  return $GRILL_EXIT_ERROR
228
228
  fi
229
229
  local out
230
- out="$(printf '%s' "$prompt" | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" codex exec --full-auto - 2>/dev/null)"
230
+ out="$(printf '%s' "$prompt" | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" codex exec --sandbox workspace-write - 2>/dev/null)"
231
231
  if [ -z "$out" ]; then
232
232
  _grill_err "provider returned no output"
233
233
  return $GRILL_EXIT_ERROR
@@ -63,31 +63,35 @@ loki_remaining_budget() {
63
63
  local budget_file="${TARGET_DIR:-.}/.loki/metrics/budget.json"
64
64
  local spend="0"
65
65
  if [ -f "$budget_file" ]; then
66
- spend=$(python3 -c "
67
- import json, sys
66
+ # Pass the path via env var (os.environ), NOT string interpolation, so a
67
+ # path containing a single quote (or other python/shell-breaking char)
68
+ # cannot break the parse. Single-quoted program -> bash interpolates nothing.
69
+ spend=$(_LOKI_BUDGET_FILE="$budget_file" python3 -c '
70
+ import json, os, sys
68
71
  try:
69
- with open('$budget_file') as f:
72
+ with open(os.environ["_LOKI_BUDGET_FILE"]) as f:
70
73
  d = json.load(f)
71
- v = d.get('current_spend', 0)
74
+ v = d.get("current_spend", 0)
72
75
  print(float(v))
73
76
  except Exception:
74
77
  print(0)
75
- " 2>/dev/null)
78
+ ' 2>/dev/null)
76
79
  fi
77
80
  # Compute remaining via python3 (bash floats are unreliable across awk/bc variations).
78
- python3 -c "
79
- import sys
81
+ # Pass limit/spend via env vars too (same hardening; single-quoted program).
82
+ _LOKI_BUDGET_LIMIT="$limit" _LOKI_BUDGET_SPEND="$spend" python3 -c '
83
+ import os, sys
80
84
  try:
81
- limit = float('$limit')
82
- spend = float('$spend')
85
+ limit = float(os.environ["_LOKI_BUDGET_LIMIT"])
86
+ spend = float(os.environ["_LOKI_BUDGET_SPEND"])
83
87
  rem = limit - spend
84
88
  # Strictly positive; otherwise emit nothing (caller decides whether to bail or warn).
85
89
  if rem > 0:
86
90
  # Round to 2 decimal places for the CLI.
87
- print(f'{rem:.2f}')
91
+ print(f"{rem:.2f}")
88
92
  except Exception:
89
93
  pass
90
- " 2>/dev/null
94
+ ' 2>/dev/null
91
95
  }
92
96
 
93
97
  # ---------- Fallback model ----------
@@ -57,7 +57,7 @@ def invoke_llm(prompt, timeout=120):
57
57
 
58
58
  cmds = {
59
59
  "claude": ["claude", "-p", prompt],
60
- "codex": ["codex", "exec", "--full-auto", prompt],
60
+ "codex": ["codex", "exec", "--sandbox", "workspace-write", prompt],
61
61
  "cline": ["cline", "-y", prompt],
62
62
  "aider": ["aider", "--message", prompt, "--yes-always", "--no-auto-commits"],
63
63
  }
package/autonomy/loki CHANGED
@@ -743,7 +743,7 @@ show_help() {
743
743
  echo " --complex Force complex complexity tier (8 phases)"
744
744
  echo " --github Enable GitHub issue import"
745
745
  echo " --no-dashboard Disable web dashboard"
746
- echo " --sandbox Run in Docker sandbox for isolation"
746
+ echo " --sandbox Run in Docker sandbox for isolation (default: off; requires Docker)"
747
747
  echo " --skip-memory Skip loading memory context at startup"
748
748
  echo " --fresh-prd Regenerate the PRD from the codebase (no-PRD runs; ignores the reusable generated PRD)"
749
749
  echo " --compliance PRESET Enable compliance mode (default|healthcare|fintech|government)"
@@ -1063,7 +1063,7 @@ cmd_start() {
1063
1063
  echo " --github Enable GitHub issue import"
1064
1064
  echo " --no-dashboard Disable web dashboard"
1065
1065
  echo " --api Start dashboard API server alongside the build"
1066
- echo " --sandbox Run in Docker sandbox"
1066
+ echo " --sandbox Run in Docker sandbox (default: off; requires Docker)"
1067
1067
  echo " --skip-memory Skip loading memory context at startup"
1068
1068
  echo " --fresh-prd Regenerate the PRD from the codebase on a no-PRD run"
1069
1069
  echo " (ignores the reusable generated PRD; aliases: --regen-prd,"
@@ -3785,7 +3785,7 @@ cmd_provider_info() {
3785
3785
  echo "Name: Codex CLI"
3786
3786
  echo "Vendor: OpenAI"
3787
3787
  echo "CLI: codex"
3788
- echo "Flag: --full-auto"
3788
+ echo "Flag: --sandbox workspace-write"
3789
3789
  echo ""
3790
3790
  echo "Features:"
3791
3791
  echo " - Autonomous mode"
@@ -5680,7 +5680,7 @@ cmd_run() {
5680
5680
  echo " --simple Force simple complexity tier"
5681
5681
  echo " --complex Force complex complexity tier"
5682
5682
  echo " --no-dashboard Disable web dashboard"
5683
- echo " --sandbox Run in Docker sandbox"
5683
+ echo " --sandbox Run in Docker sandbox (default: off; requires Docker)"
5684
5684
  echo " --no-plan Skip auto-shown PRD analysis at startup"
5685
5685
  echo " --budget USD Set cost budget limit"
5686
5686
  echo ""
@@ -11641,7 +11641,7 @@ except Exception: pass
11641
11641
  done; } && phase_exit=0 || phase_exit=$?
11642
11642
  ;;
11643
11643
  codex)
11644
- (cd "$codebase_path" && codex exec --full-auto "$phase_prompt" 2>&1) || phase_exit=$?
11644
+ (cd "$codebase_path" && codex exec --sandbox workspace-write "$phase_prompt" 2>&1) || phase_exit=$?
11645
11645
  ;;
11646
11646
  cline)
11647
11647
  (cd "$codebase_path" && cline -y "$phase_prompt" 2>&1) || phase_exit=$?
@@ -11814,7 +11814,7 @@ except Exception: pass
11814
11814
  done; } && doc_exit=0 || doc_exit=$?
11815
11815
  ;;
11816
11816
  codex)
11817
- (cd "$codebase_path" && codex exec --full-auto "$doc_prompt" 2>&1) || doc_exit=$?
11817
+ (cd "$codebase_path" && codex exec --sandbox workspace-write "$doc_prompt" 2>&1) || doc_exit=$?
11818
11818
  ;;
11819
11819
  cline)
11820
11820
  (cd "$codebase_path" && cline -y "$doc_prompt" 2>&1) || doc_exit=$?
@@ -12445,7 +12445,7 @@ except Exception: pass
12445
12445
  done && heal_exit=0 || heal_exit=$?
12446
12446
  ;;
12447
12447
  codex)
12448
- (cd "$codebase_path" && codex exec --full-auto "$heal_prompt" 2>&1) || heal_exit=$?
12448
+ (cd "$codebase_path" && codex exec --sandbox workspace-write "$heal_prompt" 2>&1) || heal_exit=$?
12449
12449
  ;;
12450
12450
  cline)
12451
12451
  (cd "$codebase_path" && cline -y "$heal_prompt" 2>&1) || heal_exit=$?
@@ -22069,7 +22069,7 @@ USER TASK: ${prompt}"
22069
22069
  claude -p "$full_prompt" 2>&1 || agent_exit=$?
22070
22070
  ;;
22071
22071
  codex)
22072
- codex exec --full-auto "$full_prompt" 2>&1 || agent_exit=$?
22072
+ codex exec --sandbox workspace-write "$full_prompt" 2>&1 || agent_exit=$?
22073
22073
  ;;
22074
22074
  cline)
22075
22075
  cline -y "$full_prompt" 2>&1 || agent_exit=$?
@@ -22200,7 +22200,7 @@ $diff"
22200
22200
 
22201
22201
  case "$provider" in
22202
22202
  claude) claude -p "$review_prompt" 2>&1 ;;
22203
- codex) codex exec --full-auto "$review_prompt" 2>&1 ;;
22203
+ codex) codex exec --sandbox workspace-write "$review_prompt" 2>&1 ;;
22204
22204
  cline) cline -y "$review_prompt" 2>&1 ;;
22205
22205
  *) echo -e "${RED}Unknown provider: $provider${NC}"; return 1 ;;
22206
22206
  esac
@@ -23870,7 +23870,7 @@ _docs_invoke_provider() {
23870
23870
  result=$($t_prefix env CAVEMAN_DEFAULT_MODE=off claude -p "$prompt" 2>/dev/null) || exit_code=$?
23871
23871
  ;;
23872
23872
  codex)
23873
- result=$($t_prefix codex exec --full-auto "$prompt" 2>/dev/null) || exit_code=$?
23873
+ result=$($t_prefix codex exec --sandbox workspace-write "$prompt" 2>/dev/null) || exit_code=$?
23874
23874
  ;;
23875
23875
  cline)
23876
23876
  result=$($t_prefix cline -y "$prompt" 2>/dev/null) || exit_code=$?
@@ -422,7 +422,11 @@ checklist_should_verify() {
422
422
  # non-cooperative agent with filesystem tools can read the reservation directly.
423
423
  #
424
424
  # Selection is idempotent and reproducible: count = clamp(round(0.25*N), 1, 5)
425
- # for N>=4 items; ordering by sha256 of each item's "id" (stable, not random).
425
+ # for N>=2 items; ordering by sha256 of each item's "id" (stable, not random).
426
+ # Small checklists (2 <= N < 4) reserve exactly 1 held-out item via the same
427
+ # sha256-rank selection (the clamp floor of 1 guarantees coverage), so a small
428
+ # spec's checklist is never fully gameable. N<2 is a no-op: holding out the only
429
+ # item of a 1-item checklist would leave nothing to verify against in the loop.
426
430
  # Written once to .loki/checklist/held-out.json; never overwritten if present.
427
431
  checklist_select_heldout() {
428
432
  local heldout_file="${CHECKLIST_DIR:-".loki/checklist"}/held-out.json"
@@ -442,7 +446,7 @@ checklist_select_heldout() {
442
446
  # PARTIAL kept=k dropped=d - some prior ids survived; we keep only survivors
443
447
  # DUP_SKIP - current checklist ids are not unique; the id-based
444
448
  # mechanism is unsound, so we reserve nothing (MEDIUM-2)
445
- # NOOP - n<4 with no prior file, or other no-write outcome
449
+ # NOOP - n<2 with no prior file, or other no-write outcome
446
450
  # Honest caveat: re-selection or partial-survival after a regen can reserve
447
451
  # items the build loop already saw in earlier prompts (the hidden-from-loop
448
452
  # guarantee is best-effort once the checklist ids change mid-run).
@@ -512,7 +516,7 @@ if os.path.exists(out_path):
512
516
 
513
517
  if prior is not None:
514
518
  prior_ids = [i for i in prior.get('held_out', []) if i]
515
- # A prior reservation of [] (e.g. an earlier n<4 run) is a valid no-op state;
519
+ # A prior reservation of [] (e.g. an earlier n<2 run) is a valid no-op state;
516
520
  # keep it idempotent rather than re-selecting now that n may have grown.
517
521
  if not prior_ids:
518
522
  print('IDEMPOTENT')
@@ -525,9 +529,11 @@ if prior is not None:
525
529
  if not survivors:
526
530
  # Fully stale: the checklist regenerated and orphaned the reservation.
527
531
  # Deterministically re-select from the CURRENT checklist.
528
- if n < 4:
532
+ if n < 2:
533
+ # N<2: cannot hold out from a 1-item checklist (reserving the only
534
+ # item leaves nothing to verify against). No-op write of an empty set.
529
535
  atomic_write({'held_out': [], 'total_items': n,
530
- 'note': 'n<4: no held-out reserved (re-selected after stale reservation)'})
536
+ 'note': 'n<2: no held-out reserved (re-selected after stale reservation)'})
531
537
  print('RESELECTED 0')
532
538
  sys.exit(0)
533
539
  held = fresh_selection()
@@ -542,11 +548,15 @@ if prior is not None:
542
548
  sys.exit(0)
543
549
 
544
550
  # No prior reservation: first selection.
545
- if n < 4:
546
- # N>=4 gate: smaller checklists get no held-out (nothing to hide reliably).
547
- atomic_write({'held_out': [], 'total_items': n, 'note': 'n<4: no held-out reserved'})
551
+ if n < 2:
552
+ # N<2 gate: a 1-item (or empty) checklist cannot meaningfully hold out an
553
+ # item -- reserving the only item would leave nothing to verify against in
554
+ # the build loop. Write an empty set so downstream reads stay well-formed.
555
+ atomic_write({'held_out': [], 'total_items': n, 'note': 'n<2: no held-out reserved'})
548
556
  print('NOOP')
549
557
  sys.exit(0)
558
+ # For 2 <= N < 4, fresh_selection() reserves exactly 1 item (select_count clamps
559
+ # round(0.25*N) up to a floor of 1), so small specs are never fully gameable.
550
560
 
551
561
  held = fresh_selection()
552
562
  atomic_write({'held_out': held, 'total_items': n})
package/autonomy/run.sh CHANGED
@@ -585,7 +585,7 @@ BACKGROUND_MODE=${LOKI_BACKGROUND:-false} # Run in background
585
585
  STAGED_AUTONOMY=${LOKI_STAGED_AUTONOMY:-false} # Require plan approval
586
586
  AUDIT_LOG_ENABLED=${LOKI_AUDIT_LOG:-true} # Enable audit logging (on by default)
587
587
  MAX_PARALLEL_AGENTS=${LOKI_MAX_PARALLEL_AGENTS:-10} # Limit concurrent agents
588
- SANDBOX_MODE=${LOKI_SANDBOX_MODE:-false} # Docker sandbox mode
588
+ SANDBOX_MODE=${LOKI_SANDBOX_MODE:-false} # Docker sandbox mode (informational; the real dispatch reads LOKI_SANDBOX_MODE at autonomy/loki:1965 and execs sandbox.sh -- this var is not consumed in run.sh)
589
589
  ALLOWED_PATHS=${LOKI_ALLOWED_PATHS:-""} # Empty = all paths allowed
590
590
  BLOCKED_COMMANDS=${LOKI_BLOCKED_COMMANDS:-"rm -rf /,dd if=,mkfs,:(){ :|:& };:"}
591
591
 
@@ -3264,7 +3264,7 @@ spawn_worktree_session() {
3264
3264
  fi
3265
3265
  ;;
3266
3266
  codex)
3267
- codex exec --full-auto --skip-git-repo-check \
3267
+ codex exec --sandbox workspace-write --skip-git-repo-check \
3268
3268
  "Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
3269
3269
  >> "$log_file" 2>&1 || _wt_exit=$?
3270
3270
  ;;
@@ -3480,7 +3480,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
3480
3480
  resolution=$(CAVEMAN_DEFAULT_MODE=off claude "${_cr_argv[@]}" -p "$conflict_prompt" --output-format text 2>/dev/null)
3481
3481
  ;;
3482
3482
  codex)
3483
- resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
3483
+ resolution=$(codex exec --sandbox workspace-write --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
3484
3484
  ;;
3485
3485
  cline)
3486
3486
  resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
@@ -6199,7 +6199,7 @@ check_command_allowed() {
6199
6199
  # run.sh does not directly execute arbitrary shell commands from user or agent
6200
6200
  # input. Command execution is handled by the AI CLI's own permission model:
6201
6201
  # - Claude Code: --dangerously-skip-permissions (with its own allowlist)
6202
- # - Codex CLI: --full-auto or exec --dangerously-bypass-approvals-and-sandbox
6202
+ # - Codex CLI: exec --sandbox workspace-write or exec --dangerously-bypass-approvals-and-sandbox
6203
6203
  #
6204
6204
  # HUMAN_INPUT.md content is injected as a text prompt to the AI agent (not
6205
6205
  # executed as a shell command), and is already guarded by:
@@ -8313,6 +8313,89 @@ enforce_mutation_integrity() {
8313
8313
  return 0
8314
8314
  }
8315
8315
 
8316
+ # ============================================================================
8317
+ # Semantic Test-Authenticity Gate (P1-3): wire tests/detect-semantic-test-problems.sh
8318
+ # as an OPT-IN completion gate. The detector catches the harder class of fake
8319
+ # tests that the regex detectors (gates 5+6) miss: assertions that look real but
8320
+ # verify nothing because the asserted value never flows through code under test
8321
+ # (literal-via-variable echo HIGH, mock-return echo MED, deleted assertions MED).
8322
+ #
8323
+ # ADVISORY-FIRST POSTURE (no-deadlock contract): this helper is invoked ONLY when
8324
+ # LOKI_GATE_SEMANTIC_TESTS=true (the elif guard at the completion-promise arm
8325
+ # short-circuits when off, so there is zero runtime cost on the default path).
8326
+ # When on, it runs the detector with --block-high (clean exit-code contract:
8327
+ # rc 2 iff a CRITICAL/HIGH finding exists). We surface ALL severities to a
8328
+ # findings file (advisory) and return nonzero ONLY on rc 2. Every other exit --
8329
+ # rc 0 (clean), rc 124 (timeout), detector absent, no test files, malformed
8330
+ # output -- returns 0 (pass/fall-through), so the autonomous loop can NEVER
8331
+ # deadlock on a clean run. Mirrors enforce_mock_integrity's invocation
8332
+ # (cd TARGET_DIR + LOKI_SCAN_DIR=TARGET_DIR + timeout), swapping --strict for
8333
+ # --block-high and deciding on the rc-2 contract instead of grepping stdout.
8334
+ # ============================================================================
8335
+ enforce_semantic_integrity() {
8336
+ local loki_dir="${TARGET_DIR:-.}/.loki"
8337
+ local quality_dir="$loki_dir/quality"
8338
+ mkdir -p "$quality_dir"
8339
+ local findings_file="$quality_dir/semantic-findings.txt"
8340
+ local detector="$SCRIPT_DIR/../tests/detect-semantic-test-problems.sh"
8341
+ local gate_timeout="${LOKI_GATE_TIMEOUT:-300}"
8342
+
8343
+ if [ ! -f "$detector" ]; then
8344
+ log_info "Semantic test gate: detector not found, skipping (inconclusive)"
8345
+ rm -f "$findings_file" 2>/dev/null || true
8346
+ return 0
8347
+ fi
8348
+
8349
+ local output rc
8350
+ # --block-high exits 2 iff CRITICAL/HIGH present; 0 otherwise (clean wrapper).
8351
+ output=$(cd "${TARGET_DIR:-.}" && LOKI_SCAN_DIR="${TARGET_DIR:-.}" \
8352
+ timeout "$gate_timeout" bash "$detector" --block-high 2>&1)
8353
+ rc=$?
8354
+
8355
+ # timeout exit 124 -- inconclusive, never block on a hang (deny-filter)
8356
+ if [ "$rc" -eq 124 ]; then
8357
+ log_warn "Semantic test gate: detector timed out after ${gate_timeout}s -- inconclusive"
8358
+ rm -f "$findings_file" 2>/dev/null || true
8359
+ return 0
8360
+ fi
8361
+
8362
+ if [ "$rc" -eq 2 ]; then
8363
+ # rc 2 == one or more CRITICAL/HIGH findings. Persist per-finding text.
8364
+ {
8365
+ echo "# Semantic test-authenticity findings (CRITICAL/HIGH block this completion)"
8366
+ echo "$output" | grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' || true
8367
+ } > "$findings_file"
8368
+ log_warn "Semantic test gate: CRITICAL/HIGH fake-test problems detected -- BLOCK"
8369
+ return 1
8370
+ fi
8371
+
8372
+ # rc 0 (and any other non-2, non-124 code, e.g. a malformed run) -> PASS.
8373
+ # Route any MED/LOW advisory findings to the injection file, else clear it.
8374
+ local med_low
8375
+ med_low=$(echo "$output" | grep -E '\[(MEDIUM|LOW)\]' || true)
8376
+ if [ -n "$med_low" ]; then
8377
+ {
8378
+ echo "# Semantic test advisory findings (MED/LOW, non-blocking)"
8379
+ echo "$med_low"
8380
+ } > "$findings_file"
8381
+ else
8382
+ rm -f "$findings_file" 2>/dev/null || true
8383
+ fi
8384
+ log_info "Semantic test gate: PASS"
8385
+ return 0
8386
+ }
8387
+
8388
+ # P1-3 wrapper that runs the semantic gate and returns its exact rc, mirroring
8389
+ # _evidence_gate_and_surface so the completion-promise elif arm reads cleanly
8390
+ # (`! _semantic_gate_and_surface`). Returns nonzero ONLY when enforce_semantic_integrity
8391
+ # saw an rc-2 (CRITICAL/HIGH) result; all deny-filter cases already collapse to 0
8392
+ # inside enforce_semantic_integrity, so this never blocks a clean run.
8393
+ _semantic_gate_and_surface() {
8394
+ local _rc=0
8395
+ enforce_semantic_integrity || _rc=$?
8396
+ return "$_rc"
8397
+ }
8398
+
8316
8399
  # ============================================================================
8317
8400
  # 3-Reviewer Parallel Code Review (v5.35.0)
8318
8401
  # Specialist pool from skills/quality-gates.md with blind review
@@ -8637,7 +8720,7 @@ _dispatch_reviewer() {
8637
8720
  --output-format text > "$review_output" 2>/dev/null
8638
8721
  ;;
8639
8722
  codex)
8640
- codex exec --full-auto --skip-git-repo-check "$prompt_text" \
8723
+ codex exec --sandbox workspace-write --skip-git-repo-check "$prompt_text" \
8641
8724
  > "$review_output" 2>/dev/null
8642
8725
  ;;
8643
8726
  cline)
@@ -9361,7 +9444,7 @@ ADVERSARIAL_EOF
9361
9444
  ;;
9362
9445
  codex)
9363
9446
  if command -v codex &>/dev/null; then
9364
- codex exec --full-auto --skip-git-repo-check "$adversarial_prompt" \
9447
+ codex exec --sandbox workspace-write --skip-git-repo-check "$adversarial_prompt" \
9365
9448
  > "$result_file" 2>/dev/null || true
9366
9449
  fi
9367
9450
  ;;
@@ -12248,6 +12331,23 @@ if d.get('blocked'):
12248
12331
  gate_failure_context="${gate_failure_context}FIX THESE ISSUES BEFORE PROCEEDING WITH NEW WORK."
12249
12332
  fi
12250
12333
 
12334
+ # P1-3: surface specific semantic test-authenticity findings (which fake test,
12335
+ # which line) when the opt-in gate (LOKI_GATE_SEMANTIC_TESTS) wrote them, so a
12336
+ # block converges: the agent gets the exact files/lines to fix rather than a
12337
+ # bare gate name. The file exists only when the gate ran AND found something
12338
+ # (cleared on clean), so this is zero-cost on the default path and when off.
12339
+ # Mirrors the static-analysis/test-results detail-surfacing above. Surfaced
12340
+ # whether the run blocked (CRIT/HIGH) or only advised (MED/LOW): both inform
12341
+ # the next iteration. Independent of gate-failures.txt presence (the
12342
+ # completion-promise arm does not append a gate token).
12343
+ if [ -f "${TARGET_DIR:-.}/.loki/quality/semantic-findings.txt" ]; then
12344
+ local sem_findings
12345
+ sem_findings=$(grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' "${TARGET_DIR:-.}/.loki/quality/semantic-findings.txt" 2>/dev/null | head -20 || true)
12346
+ if [ -n "$sem_findings" ]; then
12347
+ gate_failure_context="${gate_failure_context} SEMANTIC TEST-AUTHENTICITY FINDINGS (fix the fake tests; an assertion must verify a value that flows through the code under test, not echo a literal back): ${sem_findings}"
12348
+ fi
12349
+ fi
12350
+
12251
12351
  # P2-2: high-severity spec-assumption context. When DISCOVERY recorded any
12252
12352
  # high-severity assumption (the spec was ambiguous in a high-impact place),
12253
12353
  # surface it to the build agent so it implements with the gap in view (or
@@ -14717,7 +14817,7 @@ if __name__ == "__main__":
14717
14817
  # Uses dynamic tier from RARV phase (tier_param already set above)
14718
14818
  { LOKI_CODEX_REASONING_EFFORT="$tier_param" \
14719
14819
  CODEX_MODEL_REASONING_EFFORT="$tier_param" \
14720
- codex exec --full-auto --skip-git-repo-check \
14820
+ codex exec --sandbox workspace-write --skip-git-repo-check \
14721
14821
  "$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
14722
14822
  } && exit_code=0 || exit_code=$?
14723
14823
  ;;
@@ -15347,6 +15447,20 @@ else:
15347
15447
  log_warn "Completion claim rejected: assumption ledger gate found unresolved high-severity spec assumption(s)."
15348
15448
  log_warn " Details under .loki/council/assumption-block.json ; opt out with LOKI_ASSUMPTION_GATE=0"
15349
15449
  # Fall through; keep iterating until high-sev assumptions resolve.
15450
+ # P1-3: semantic test-authenticity gate (OPT-IN, default OFF). Catches
15451
+ # fake tests that look real but verify nothing (literal-via-variable
15452
+ # echo etc.) that the regex gates 5+6 miss. ADVISORY-FIRST: the arm is
15453
+ # guarded by LOKI_GATE_SEMANTIC_TESTS=true, so by default it never runs
15454
+ # (zero runtime cost, never blocks). When enabled it runs the detector
15455
+ # with --block-high and rejects the completion ONLY on a CRITICAL/HIGH
15456
+ # finding; clean / no-test-files / detector-absent / timeout / malformed
15457
+ # all collapse to a pass inside _semantic_gate_and_surface, so the
15458
+ # autonomous loop can never deadlock on a clean run. Mirrors the
15459
+ # evidence / held-out / assumption arms above.
15460
+ elif [ "$_completion_claimed" = 1 ] && [ "${LOKI_GATE_SEMANTIC_TESTS:-false}" = "true" ] && type _semantic_gate_and_surface &>/dev/null && ! _semantic_gate_and_surface; then
15461
+ log_warn "Completion claim rejected: semantic test-authenticity gate found CRITICAL/HIGH fake-test problem(s)."
15462
+ log_warn " Details under .loki/quality/semantic-findings.txt ; opt-in gate -- disable with LOKI_GATE_SEMANTIC_TESTS=false"
15463
+ # Fall through; keep iterating until the fake tests are fixed.
15350
15464
  elif [ "$_completion_claimed" = 1 ]; then
15351
15465
  echo ""
15352
15466
  if [ -n "$COMPLETION_PROMISE" ]; then
@@ -7,7 +7,7 @@ Modules:
7
7
  control: Session control API (start/stop/pause/resume)
8
8
  """
9
9
 
10
- __version__ = "7.51.0"
10
+ __version__ = "7.53.0"
11
11
 
12
12
  # Expose the control app for easy import
13
13
  try:
@@ -3248,6 +3248,121 @@ async def get_audit_summary(days: int = 7):
3248
3248
  return audit.get_audit_summary(days=days)
3249
3249
 
3250
3250
 
3251
+ # Continuous compliance surface (P3-11).
3252
+ #
3253
+ # Exposes the agent audit chain's compliance posture as an always-available
3254
+ # live endpoint. There is NO background scheduler in this surface (that is
3255
+ # infra, out of scope): the report is regenerated from the CURRENT audit
3256
+ # state on every request, so the endpoint is "continuous" in the sense that
3257
+ # it always reflects live state -- never a stale cached snapshot.
3258
+ #
3259
+ # The report is produced by the authoritative Node compliance engine
3260
+ # (src/audit/index.js, the single source of truth for SOC2/ISO/GDPR control
3261
+ # mappings) via its `report` CLI shim, so the Python surface never
3262
+ # reimplements (and never drifts from) the mapping logic. The chain it reads
3263
+ # is the JS AGENT chain at <project>/.loki/audit/audit.jsonl -- a different
3264
+ # chain from the Python dashboard chain that /api/enterprise/audit serves
3265
+ # (the two are reconciled by the cross-link verifier, not merged), so this
3266
+ # endpoint deliberately does NOT gate on audit.is_audit_enabled() (that flag
3267
+ # governs the Python chain). When the agent chain has no entries the report
3268
+ # is returned honestly with totalAuditEntries == 0; no fabricated pass.
3269
+ _COMPLIANCE_TYPES = ("soc2", "iso27001", "gdpr")
3270
+
3271
+
3272
+ @app.get("/api/compliance", dependencies=[Depends(auth.require_scope("audit"))])
3273
+ def get_compliance_status(report_type: str = Query("soc2", alias="type")):
3274
+ """Live compliance status for the active project's agent audit chain.
3275
+
3276
+ Auth/tenant scoping: requires the `audit` scope (same gate as the
3277
+ /api/enterprise/audit family). The data is filesystem state scoped to
3278
+ the active project via _get_loki_dir(), exactly like the other
3279
+ .loki-backed read endpoints; there is no DB tenant_id on a JSONL file
3280
+ to enforce against.
3281
+
3282
+ Query: ?type=soc2|iso27001|gdpr (default soc2).
3283
+
3284
+ Returns the compliance report JSON regenerated from CURRENT audit
3285
+ state on every call. If no audit data has been recorded the report is
3286
+ honestly empty (totalAuditEntries == 0), not a fabricated compliant
3287
+ verdict. If the Node engine is unavailable, returns an honest
3288
+ available:false payload (HTTP 200) rather than masquerading as "no
3289
+ compliance".
3290
+ """
3291
+ if not _read_limiter.check("compliance"):
3292
+ raise HTTPException(status_code=429, detail="Rate limit exceeded")
3293
+ if report_type not in _COMPLIANCE_TYPES:
3294
+ raise HTTPException(
3295
+ status_code=400,
3296
+ detail=f"Invalid type: {report_type}. Must be one of {list(_COMPLIANCE_TYPES)}",
3297
+ )
3298
+
3299
+ import shutil
3300
+
3301
+ # The agent audit chain lives under <project>/.loki/audit; _get_loki_dir()
3302
+ # returns the .loki dir, so the project root is its parent.
3303
+ project_dir = str(_get_loki_dir().parent.resolve())
3304
+ repo_root = _Path(__file__).resolve().parent.parent
3305
+ index_js = repo_root / "src" / "audit" / "index.js"
3306
+
3307
+ node_bin = shutil.which("node")
3308
+ if node_bin is None or not index_js.exists():
3309
+ return {
3310
+ "available": False,
3311
+ "reason": (
3312
+ "Node runtime not found"
3313
+ if node_bin is None
3314
+ else f"compliance engine not found at {index_js}"
3315
+ ),
3316
+ "reportType": report_type,
3317
+ "projectDir": project_dir,
3318
+ "report": None,
3319
+ }
3320
+
3321
+ try:
3322
+ proc = subprocess.run(
3323
+ [node_bin, str(index_js), "report", report_type, project_dir],
3324
+ capture_output=True,
3325
+ text=True,
3326
+ timeout=30,
3327
+ check=False,
3328
+ )
3329
+ except (OSError, subprocess.SubprocessError) as exc:
3330
+ return {
3331
+ "available": False,
3332
+ "reason": f"compliance engine invocation failed: {exc}",
3333
+ "reportType": report_type,
3334
+ "projectDir": project_dir,
3335
+ "report": None,
3336
+ }
3337
+
3338
+ if proc.returncode != 0:
3339
+ return {
3340
+ "available": False,
3341
+ "reason": (proc.stderr or "compliance engine returned non-zero").strip()[:500],
3342
+ "reportType": report_type,
3343
+ "projectDir": project_dir,
3344
+ "report": None,
3345
+ }
3346
+
3347
+ try:
3348
+ report = json.loads(proc.stdout.strip())
3349
+ except json.JSONDecodeError:
3350
+ return {
3351
+ "available": False,
3352
+ "reason": "compliance engine produced non-JSON output",
3353
+ "reportType": report_type,
3354
+ "projectDir": project_dir,
3355
+ "report": None,
3356
+ }
3357
+
3358
+ return {
3359
+ "available": True,
3360
+ "reportType": report_type,
3361
+ "projectDir": project_dir,
3362
+ "report": report,
3363
+ }
3364
+
3365
+
3251
3366
  # =============================================================================
3252
3367
  # File-based Session Endpoints (reads from .loki/ flat files)
3253
3368
  # =============================================================================
@@ -2,7 +2,7 @@
2
2
 
3
3
  The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
4
4
 
5
- **Version:** v7.51.0
5
+ **Version:** v7.53.0
6
6
 
7
7
  ---
8
8
 
@@ -396,7 +396,7 @@ provider works inside the container. Provide auth with your Anthropic API key:
396
396
  # Run Loki Mode in Docker (Claude provider, API-key auth)
397
397
  docker run --rm -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
398
398
  -v $(pwd):/workspace -w /workspace \
399
- asklokesh/loki-mode:7.51.0 start ./my-spec.md
399
+ asklokesh/loki-mode:7.53.0 start ./my-spec.md
400
400
  ```
401
401
 
402
402
  ##### docker compose + .env (no host install)
@@ -1,5 +1,5 @@
1
1
  // @bun
2
- var r6=Object.defineProperty;var t6=($)=>$;function i6($,Q){this[$]=t6.bind(null,Q)}var h=($,Q)=>{for(var Z in Q)r6($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:i6.bind(Q,Z)})};var L=($,Q)=>()=>($&&(Q=$($=0)),Q);var K$=import.meta.require;var D1={};h(D1,{lokiDir:()=>P,homeLokiDir:()=>n$,findRepoRootForVersion:()=>o$,REPO_ROOT:()=>g});import{resolve as n,dirname as d$}from"path";import{fileURLToPath as e6}from"url";import{existsSync as P$}from"fs";import{homedir as $Q}from"os";function QQ(){let $=S1;for(let Q=0;Q<6;Q++){if(P$(n($,"VERSION"))&&P$(n($,"autonomy/run.sh")))return $;let Z=d$($);if(Z===$)break;$=Z}return n(S1,"..","..","..")}function o$($){let Q=$;for(let Z=0;Z<6;Z++){if(P$(n(Q,"VERSION"))&&P$(n(Q,"autonomy/run.sh")))return Q;let z=d$(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function n$(){return n($Q(),".loki")}var S1,g;var b=L(()=>{S1=d$(e6(import.meta.url));g=QQ()});import{readFileSync as ZQ}from"fs";import{resolve as zQ,dirname as XQ}from"path";import{fileURLToPath as KQ}from"url";function j$(){if($$!==null)return $$;let $="7.51.0";if(typeof $==="string"&&$.length>0)return $$=$,$$;try{let Q=XQ(KQ(import.meta.url)),Z=o$(Q);$$=ZQ(zQ(Z,"VERSION"),"utf-8").trim()}catch{$$="unknown"}return $$}var $$=null;var a$=L(()=>{b()});var b1={};h(b1,{runOrThrow:()=>qQ,run:()=>k,commandVersion:()=>WQ,commandExists:()=>f,ShellError:()=>s$});async function k($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,X;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}X=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[q,K,W]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:q,stderr:K,exitCode:W}}finally{if(z)clearTimeout(z);if(X)clearTimeout(X)}}async function qQ($,Q={}){let Z=await k($,Q);if(Z.exitCode!==0)throw new s$(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function f($){let Q=VQ($),Z=await k(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function VQ($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function WQ($,Q="--version"){if(!await f($))return null;let z=await k([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var s$;var d=L(()=>{s$=class s$ extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return JQ?"":$}var JQ,T,S,_,wZ,I,R,y,V;var c=L(()=>{JQ=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),S=a("\x1B[0;32m"),_=a("\x1B[1;33m"),wZ=a("\x1B[0;34m"),I=a("\x1B[0;36m"),R=a("\x1B[1m"),y=a("\x1B[2m"),V=a("\x1B[0m")});import{existsSync as wQ}from"fs";async function Q$(){if(G$!==void 0)return G$;let $="/opt/homebrew/bin/python3.12";if(wQ($))return G$=$,$;let Q=await f("python3.12");if(Q)return G$=Q,Q;let Z=await f("python3");return G$=Z,Z}async function Z$($,Q={}){let Z=await Q$();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return k([Z,"-c",$],Q)}var G$;var q$=L(()=>{d()});var e1={};h(e1,{runStatus:()=>uQ});import{existsSync as v,readFileSync as W$,readdirSync as d1,statSync as o1}from"fs";import{resolve as C,basename as DQ}from"path";import{homedir as CQ}from"os";function n1($){let Q=Math.trunc($);if(Q>=1e6)return`${(Math.trunc(Q/1e6*10)/10).toFixed(1)}M`;if(Q>=1000)return`${(Math.trunc(Q/1000*10)/10).toFixed(1)}K`;return String(Q)}function a1($,Q,Z){if(Q===0)return null;let z=Math.trunc($*100/Q),X=Math.trunc($*k$/Q);if(X>k$)X=k$;let q=k$-X,K=S;if(z>=80)K=T;else if(z>=50)K=_;let W="=".repeat(Math.max(0,X))+" ".repeat(Math.max(0,q)),J=n1($),U=n1(Q);return` ${R}${Z}${V} ${K}[${W}]${V} ${z}% (${J} / ${U})`}async function hQ(){if(await f("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${V}
2
+ var r6=Object.defineProperty;var t6=($)=>$;function i6($,Q){this[$]=t6.bind(null,Q)}var h=($,Q)=>{for(var Z in Q)r6($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:i6.bind(Q,Z)})};var L=($,Q)=>()=>($&&(Q=$($=0)),Q);var K$=import.meta.require;var D1={};h(D1,{lokiDir:()=>P,homeLokiDir:()=>n$,findRepoRootForVersion:()=>o$,REPO_ROOT:()=>g});import{resolve as n,dirname as d$}from"path";import{fileURLToPath as e6}from"url";import{existsSync as P$}from"fs";import{homedir as $Q}from"os";function QQ(){let $=S1;for(let Q=0;Q<6;Q++){if(P$(n($,"VERSION"))&&P$(n($,"autonomy/run.sh")))return $;let Z=d$($);if(Z===$)break;$=Z}return n(S1,"..","..","..")}function o$($){let Q=$;for(let Z=0;Z<6;Z++){if(P$(n(Q,"VERSION"))&&P$(n(Q,"autonomy/run.sh")))return Q;let z=d$(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function n$(){return n($Q(),".loki")}var S1,g;var b=L(()=>{S1=d$(e6(import.meta.url));g=QQ()});import{readFileSync as ZQ}from"fs";import{resolve as zQ,dirname as XQ}from"path";import{fileURLToPath as KQ}from"url";function j$(){if($$!==null)return $$;let $="7.53.0";if(typeof $==="string"&&$.length>0)return $$=$,$$;try{let Q=XQ(KQ(import.meta.url)),Z=o$(Q);$$=ZQ(zQ(Z,"VERSION"),"utf-8").trim()}catch{$$="unknown"}return $$}var $$=null;var a$=L(()=>{b()});var b1={};h(b1,{runOrThrow:()=>qQ,run:()=>k,commandVersion:()=>WQ,commandExists:()=>f,ShellError:()=>s$});async function k($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,X;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}X=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[q,K,W]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:q,stderr:K,exitCode:W}}finally{if(z)clearTimeout(z);if(X)clearTimeout(X)}}async function qQ($,Q={}){let Z=await k($,Q);if(Z.exitCode!==0)throw new s$(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function f($){let Q=VQ($),Z=await k(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function VQ($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function WQ($,Q="--version"){if(!await f($))return null;let z=await k([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var s$;var d=L(()=>{s$=class s$ extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return JQ?"":$}var JQ,T,S,_,wZ,I,R,y,V;var c=L(()=>{JQ=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),S=a("\x1B[0;32m"),_=a("\x1B[1;33m"),wZ=a("\x1B[0;34m"),I=a("\x1B[0;36m"),R=a("\x1B[1m"),y=a("\x1B[2m"),V=a("\x1B[0m")});import{existsSync as wQ}from"fs";async function Q$(){if(G$!==void 0)return G$;let $="/opt/homebrew/bin/python3.12";if(wQ($))return G$=$,$;let Q=await f("python3.12");if(Q)return G$=Q,Q;let Z=await f("python3");return G$=Z,Z}async function Z$($,Q={}){let Z=await Q$();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return k([Z,"-c",$],Q)}var G$;var q$=L(()=>{d()});var e1={};h(e1,{runStatus:()=>uQ});import{existsSync as v,readFileSync as W$,readdirSync as d1,statSync as o1}from"fs";import{resolve as C,basename as DQ}from"path";import{homedir as CQ}from"os";function n1($){let Q=Math.trunc($);if(Q>=1e6)return`${(Math.trunc(Q/1e6*10)/10).toFixed(1)}M`;if(Q>=1000)return`${(Math.trunc(Q/1000*10)/10).toFixed(1)}K`;return String(Q)}function a1($,Q,Z){if(Q===0)return null;let z=Math.trunc($*100/Q),X=Math.trunc($*k$/Q);if(X>k$)X=k$;let q=k$-X,K=S;if(z>=80)K=T;else if(z>=50)K=_;let W="=".repeat(Math.max(0,X))+" ".repeat(Math.max(0,q)),J=n1($),U=n1(Q);return` ${R}${Z}${V} ${K}[${W}]${V} ${z}% (${J} / ${U})`}async function hQ(){if(await f("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${V}
3
3
  `),process.stdout.write(`Install with:
4
4
  `),process.stdout.write(` brew install jq (macOS)
5
5
  `),process.stdout.write(` apt install jq (Debian/Ubuntu)
@@ -790,4 +790,4 @@ Set LOKI_LEGACY_BASH=1 to force the bash CLI for every command.
790
790
  `),2}default:return process.stderr.write(`Unknown command: ${Q}
791
791
  `),process.stderr.write(s6),2}}l1();process.on("SIGINT",()=>process.exit(130));process.on("SIGTERM",()=>process.exit(143));var KZ=await XZ(Bun.argv.slice(2));process.exit(KZ);
792
792
 
793
- //# debugId=8015709BAB9E625464756E2164756E21
793
+ //# debugId=3BF6CF9B99A2BD7E64756E2164756E21
@@ -482,8 +482,10 @@ class DebateRunner:
482
482
  if provider == "claude":
483
483
  return ["claude", "--dangerously-skip-permissions", "-p", prompt]
484
484
  if provider == "codex":
485
- # Codex uses `exec --full-auto` with the prompt as positional.
486
- return ["codex", "exec", "--full-auto", prompt]
485
+ # Codex uses `exec --sandbox workspace-write` with the prompt as
486
+ # positional (codex 0.132.0 deprecated --full-auto; workspace-write
487
+ # is the documented replacement, exec is non-interactive by default).
488
+ return ["codex", "exec", "--sandbox", "workspace-write", prompt]
487
489
  if provider == "gemini":
488
490
  return ["gemini", "--approval-mode=yolo", prompt]
489
491
  if provider == "cline":
@@ -180,7 +180,7 @@ class ComponentGenerator:
180
180
  if provider == "claude":
181
181
  cmd = base_cmd + [binary, "-p", prompt]
182
182
  elif provider == "codex":
183
- cmd = base_cmd + [binary, "exec", "--full-auto", prompt]
183
+ cmd = base_cmd + [binary, "exec", "--sandbox", "workspace-write", prompt]
184
184
  elif provider == "gemini":
185
185
  cmd = base_cmd + [binary, "--approval-mode=yolo", prompt]
186
186
  elif provider == "cline":
package/mcp/__init__.py CHANGED
@@ -57,4 +57,4 @@ try:
57
57
  except ImportError:
58
58
  __all__ = ['mcp']
59
59
 
60
- __version__ = '7.51.0'
60
+ __version__ = '7.53.0'
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "loki-mode",
3
3
  "mcpName": "io.github.asklokesh/loki-mode",
4
- "version": "7.51.0",
4
+ "version": "7.53.0",
5
5
  "description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 8 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
6
6
  "keywords": [
7
7
  "agent",
@@ -2,7 +2,7 @@
2
2
  "$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
3
3
  "name": "loki-mode",
4
4
  "displayName": "Loki Mode",
5
- "version": "7.51.0",
5
+ "version": "7.53.0",
6
6
  "description": "Autonomous spec-to-product build system with a built-in trust layer (RARV-C closure loop, 8 quality gates, completion council). Ships Loki's spec-hardening, drift-detection, and deterministic PR verification commands plus the Loki MCP server.",
7
7
  "author": {
8
8
  "name": "Autonomi",
@@ -29,10 +29,14 @@ PROVIDER_CLI="codex"
29
29
 
30
30
  # CLI Invocation
31
31
  # Note: codex uses positional prompt after "exec" subcommand
32
- # VERIFIED: exec --full-auto confirmed in codex exec --help (v0.98.0)
33
- # --full-auto: sets --ask-for-approval on-request + --sandbox workspace-write (v0.98.0)
32
+ # VERIFIED: codex 0.132.0 deprecates --full-auto (prints a deprecation warning
33
+ # and the flag is gone from `codex exec --help`). Use --sandbox workspace-write,
34
+ # which is the documented replacement and the sandbox --full-auto expanded to.
35
+ # `codex exec` is the non-interactive subcommand: it runs at approval "never"
36
+ # with no --ask-for-approval flag, so --sandbox workspace-write alone keeps the
37
+ # loop fully autonomous (verified against codex 0.132.0: no approval prompt).
34
38
  # Alternative: "exec --dangerously-bypass-approvals-and-sandbox" (legacy, no sandbox)
35
- PROVIDER_AUTONOMOUS_FLAG="exec --full-auto --skip-git-repo-check"
39
+ PROVIDER_AUTONOMOUS_FLAG="exec --sandbox workspace-write --skip-git-repo-check"
36
40
  PROVIDER_PROMPT_FLAG=""
37
41
  PROVIDER_PROMPT_POSITIONAL=true
38
42
 
@@ -124,7 +128,7 @@ provider_version() {
124
128
  provider_invoke() {
125
129
  local prompt="$1"
126
130
  shift
127
- codex exec --full-auto --skip-git-repo-check \
131
+ codex exec --sandbox workspace-write --skip-git-repo-check \
128
132
  --model "$PROVIDER_MODEL_DEVELOPMENT" \
129
133
  "$prompt" "$@"
130
134
  }
@@ -182,11 +186,13 @@ resolve_model_for_tier() {
182
186
 
183
187
  # Tier-aware invocation.
184
188
  #
185
- # v7.4.18: aligned with codex CLI v0.125.0 (latest as of 2026-04-26).
186
- # Replaced --full-auto preset with the explicit flags it expands to:
187
- # --ask-for-approval never
188
- # --sandbox danger-full-access
189
- # Forward-compatible if the preset is renamed; readable in process listings.
189
+ # Aligned with codex CLI 0.132.0 (verified: --full-auto deprecated/removed
190
+ # from `codex exec --help`). `codex exec` is the non-interactive subcommand and
191
+ # runs at approval "never" with no --ask-for-approval flag, so --sandbox
192
+ # workspace-write alone keeps the loop autonomous (verified: no approval prompt
193
+ # on codex 0.132.0). workspace-write is the documented --full-auto replacement
194
+ # and the safer default (scoped disk writes) over danger-full-access; readable
195
+ # in process listings.
190
196
  #
191
197
  # Optional env knobs:
192
198
  # LOKI_CODEX_WEB_SEARCH=true enable codex --search (live web)
@@ -227,8 +233,7 @@ provider_invoke_with_tier() {
227
233
  LOKI_CODEX_REASONING_EFFORT="$effort" \
228
234
  CODEX_MODEL_REASONING_EFFORT="$effort" \
229
235
  codex exec \
230
- --ask-for-approval never \
231
- --sandbox danger-full-access \
236
+ --sandbox workspace-write \
232
237
  --skip-git-repo-check \
233
238
  --model "$model" \
234
239
  "${extra_flags[@]}" \
@@ -286,7 +286,7 @@ All CLI flags have been verified against actual CLI help output:
286
286
  | Provider | Flag | Verified Version | Notes |
287
287
  |----------|------|------------------|-------|
288
288
  | Claude | `--dangerously-skip-permissions` | v2.1.34 | Autonomous mode |
289
- | Codex | `--full-auto` | v0.98.0 | Recommended; legacy: `exec --dangerously-bypass-approvals-and-sandbox` |
289
+ | Codex | `--sandbox workspace-write` | v0.132.0 | Recommended (--full-auto deprecated 0.125+); legacy: `exec --dangerously-bypass-approvals-and-sandbox` |
290
290
  | Cline | `--auto-approve` | latest | Autonomous mode |
291
291
  | Aider | `--yes-always` | latest | Autonomous mode |
292
292
 
@@ -231,13 +231,16 @@ Claude models support an `effort` parameter that controls reasoning depth withou
231
231
 
232
232
  **Note:** The effort parameter and thinking prefixes serve different purposes. Effort controls the model's internal reasoning budget; thinking prefixes guide the structure of the response.
233
233
 
234
- ### Codex --full-auto Flag
234
+ ### Codex --sandbox workspace-write Flag
235
235
 
236
- Codex CLI v0.98.0 supports `--full-auto` as the recommended autonomous mode flag, replacing the verbose `exec --dangerously-bypass-approvals-and-sandbox` invocation:
236
+ Codex CLI deprecated `--full-auto` in v0.125+ (removed from `codex exec --help`,
237
+ emits a deprecation warning if used). The documented replacement is
238
+ `--sandbox workspace-write`. The `exec` subcommand is non-interactive by default
239
+ (approval: never), so the sandbox flag alone keeps the loop autonomous:
237
240
 
238
241
  ```bash
239
- # Recommended (v0.98.0+)
240
- codex --full-auto "$prompt"
242
+ # Recommended (codex 0.125+)
243
+ codex exec --sandbox workspace-write "$prompt"
241
244
 
242
245
  # Legacy (still supported)
243
246
  codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"
@@ -6,7 +6,7 @@ Loki Mode supports four AI providers for autonomous execution.
6
6
 
7
7
  > **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
8
8
  > - Claude: `--dangerously-skip-permissions` (verified)
9
- > - Codex: `exec --full-auto --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
9
+ > - Codex: `exec --sandbox workspace-write --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs; --full-auto deprecated in codex 0.125+, workspace-write is the documented replacement) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
10
10
 
11
11
  | Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
12
12
  |---------|-------------|--------------|-----------|-------|
@@ -70,7 +70,7 @@ Task(model="haiku", ...) # Fast tier (parallelize)
70
70
  **Invocation:**
71
71
  ```bash
72
72
  # Recommended (v0.98.0+)
73
- codex exec --full-auto --skip-git-repo-check "$prompt"
73
+ codex exec --sandbox workspace-write --skip-git-repo-check "$prompt"
74
74
 
75
75
  # Legacy (still supported)
76
76
  codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"
@@ -2,12 +2,14 @@
2
2
 
3
3
  **Never ship code without passing all quality gates.**
4
4
 
5
- ## The 8 Quality Gates
5
+ ## The Quality Gates (8 default-on + 1 opt-in)
6
6
 
7
- Every gate below is wired into the orchestration loop (`autonomy/run.sh`) and
8
- blocks completion when it fails. The table lists exactly what each gate detects,
9
- what it does NOT detect (so you never over-trust a green gate), its opt-out flag,
10
- and its blocking behavior. Transcribe this list verbatim; do not recompute it.
7
+ Every gate below is wired into the orchestration loop (`autonomy/run.sh`). The 8
8
+ numbered gates are default-on and block completion when they fail; the opt-in
9
+ gate (marked below) is default-OFF and runs only when its flag is set. The table
10
+ lists exactly what each gate detects, what it does NOT detect (so you never
11
+ over-trust a green gate), its opt-out flag, and its blocking behavior. Transcribe
12
+ this list verbatim; do not recompute it.
11
13
 
12
14
  | # | Gate | Detects | Does NOT detect | Blocking | Opt-out flag |
13
15
  |---|------|---------|-----------------|----------|--------------|
@@ -19,6 +21,7 @@ and its blocking behavior. Transcribe this list verbatim; do not recompute it.
19
21
  | 6 | Test Mutation Detector | Assertion-value churn alongside implementation changes (test-fitting), low assertion density (`tests/detect-test-mutations.sh`); HIGH blocks | Logically-correct-but-weak assertions | Yes (HIGH blocks) | `LOKI_GATE_MUTATION=false` |
20
22
  | 7 | Documentation Coverage | README presence, docs freshness within 10 commits, API docs for exported symbols in packages | Whether the docs are accurate or useful | Yes | `LOKI_GATE_DOC_COVERAGE=false` |
21
23
  | 8 | Magic Modules Debate | Spec-vs-implementation debate findings on generated Magic Modules; BLOCK-severity findings block | Issues outside the Magic Modules debate scope | Yes (BLOCK severity) | `LOKI_GATE_MAGIC_DEBATE=false` |
24
+ | 9 (opt-in, default OFF) | Semantic Test-Authenticity | Fake tests that look real but verify nothing (literal-via-variable echo, mock-return echo, deleted assertions) that gates 5+6 miss (`tests/detect-semantic-test-problems.sh --block-high`); CRITICAL/HIGH block | Deep dataflow, legitimate computed-literal assertions, Python/shell tests (JS/TS only); MED/LOW are advisory | Only when enabled, and only on CRITICAL/HIGH; runs solely on a completion claim | Opt-IN: `LOKI_GATE_SEMANTIC_TESTS=true` to enable (default off = not invoked, never blocks) |
22
25
 
23
26
  **Severity-based blocking** ties the review gates together: any Critical or High
24
27
  finding blocks completion. Medium, Low, and cosmetic findings are advisory and
@@ -83,6 +83,84 @@ function exportReport(type, opts) {
83
83
  return compliance.exportReportJson(report);
84
84
  }
85
85
 
86
+ /**
87
+ * Generate a compliance report as a plain object, with the agent-chain
88
+ * tamper-evidence verdict folded in.
89
+ *
90
+ * This is the object form intended for surfaces (e.g. the dashboard
91
+ * /api/compliance endpoint) that need the report as data rather than a
92
+ * pre-serialized string. It always reflects the REAL audit chain:
93
+ *
94
+ * - The report body is generated from the live audit entries
95
+ * (`_log.readEntries()`), never fabricated.
96
+ * - `chainIntegrity` is populated from `verifyChain()` so the report
97
+ * carries the true tamper-evidence state of the underlying chain.
98
+ * For the SOC2 report this fills the `chainIntegrity: null` slot the
99
+ * generator leaves for the caller; for the other report types it is
100
+ * attached under the same key for a uniform surface contract.
101
+ *
102
+ * When the chain has no entries the report is still returned honestly
103
+ * with `totalAuditEntries: 0` (an empty-but-valid report), never a
104
+ * fabricated "compliant" verdict.
105
+ *
106
+ * @param {string} type - 'soc2', 'iso27001', or 'gdpr'
107
+ * @param {object} [opts] - Report options (projectName, period, etc.)
108
+ * @returns {object} The compliance report object with chainIntegrity set.
109
+ */
110
+ function getReport(type, opts) {
111
+ if (!_initialized) init();
112
+ var report = generateReport(type, opts);
113
+ // Fold the real tamper-evidence verdict into the report. Do not let a
114
+ // verification error fabricate a pass: capture it honestly instead.
115
+ try {
116
+ report.chainIntegrity = _log.verifyChain();
117
+ } catch (e) {
118
+ report.chainIntegrity = {
119
+ valid: false,
120
+ entries: report.totalAuditEntries || 0,
121
+ brokenAt: null,
122
+ error: 'chain verification failed: ' + String((e && e.message) || e),
123
+ };
124
+ }
125
+ return report;
126
+ }
127
+
128
+ /**
129
+ * CLI shim so a non-Node surface (e.g. the Python dashboard) can fetch a
130
+ * compliance report for a given project directory as JSON on stdout.
131
+ *
132
+ * This mirrors the inverse of dashboard/audit.py's `_unified_cli()`
133
+ * (which lets the Node-side unified verifier read the Python chain).
134
+ *
135
+ * Invoked as:
136
+ * node src/audit/index.js report <type> <projectDir>
137
+ *
138
+ * <type> is one of soc2 | iso27001 | gdpr. <projectDir> is the project
139
+ * root whose .loki/audit/audit.jsonl chain is read. Prints a single JSON
140
+ * object to stdout. Returns exit 0 on success, 2 on usage error.
141
+ *
142
+ * The report is generated from the REAL chain; an absent/empty chain
143
+ * yields an honest empty report (totalAuditEntries: 0), not a fake pass.
144
+ */
145
+ function _cli(argv) {
146
+ var args = argv || [];
147
+ var VALID_TYPES = { soc2: true, iso27001: true, gdpr: true };
148
+ if (args.length < 2 || args[0] !== 'report' || !VALID_TYPES[args[1]]) {
149
+ process.stdout.write(JSON.stringify({
150
+ error: 'usage: index.js report {soc2|iso27001|gdpr} <projectDir>',
151
+ }) + '\n');
152
+ return 2;
153
+ }
154
+ var type = args[1];
155
+ var projectDir = args[2] || process.cwd();
156
+ destroy();
157
+ init(projectDir);
158
+ var report = getReport(type);
159
+ destroy();
160
+ process.stdout.write(JSON.stringify(report) + '\n');
161
+ return 0;
162
+ }
163
+
86
164
  /**
87
165
  * Check if a provider is allowed by data residency policy.
88
166
  */
@@ -167,6 +245,7 @@ module.exports = {
167
245
  verifyChain: verifyChain,
168
246
  generateReport: generateReport,
169
247
  exportReport: exportReport,
248
+ getReport: getReport,
170
249
  checkProvider: checkProvider,
171
250
  isAirGapped: isAirGapped,
172
251
  readEntries: readEntries,
@@ -177,3 +256,8 @@ module.exports = {
177
256
  verifyUnified: verifyUnified,
178
257
  writeWitness: writeWitness,
179
258
  };
259
+
260
+ // CLI entry point: `node src/audit/index.js report <type> <projectDir>`.
261
+ if (require.main === module) {
262
+ process.exit(_cli(process.argv.slice(2)));
263
+ }