loki-mode 7.51.0 → 7.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/completion-council.sh +2 -2
- package/autonomy/grill.sh +1 -1
- package/autonomy/lib/claude-flags.sh +15 -11
- package/autonomy/lib/wiki_llm.py +1 -1
- package/autonomy/loki +10 -10
- package/autonomy/prd-checklist.sh +18 -8
- package/autonomy/run.sh +121 -7
- package/dashboard/__init__.py +1 -1
- package/dashboard/server.py +115 -0
- package/docs/INSTALLATION.md +2 -2
- package/loki-ts/dist/loki.js +2 -2
- package/magic/core/debate.py +4 -2
- package/magic/core/generator.py +1 -1
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
- package/plugins/loki-mode/.claude-plugin/plugin.json +1 -1
- package/providers/codex.sh +16 -11
- package/references/multi-provider.md +1 -1
- package/skills/model-selection.md +7 -4
- package/skills/providers.md +2 -2
- package/skills/quality-gates.md +8 -5
- package/src/audit/index.js +84 -0
package/SKILL.md
CHANGED
|
@@ -3,7 +3,7 @@ name: loki-mode
|
|
|
3
3
|
description: Autonomous spec-driven build system with a built-in trust layer. It does not call work done until it is verified (RARV-C closure loop, 8 quality gates, completion council, verified-completion evidence gate). Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
|
-
# Loki Mode v7.
|
|
6
|
+
# Loki Mode v7.53.0
|
|
7
7
|
|
|
8
8
|
**You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
|
|
9
9
|
|
|
@@ -407,4 +407,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
|
|
|
407
407
|
|
|
408
408
|
---
|
|
409
409
|
|
|
410
|
-
**v7.
|
|
410
|
+
**v7.53.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
|
package/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
7.
|
|
1
|
+
7.53.0
|
|
@@ -2038,7 +2038,7 @@ ISSUES: CRITICAL:description (optional, one per line per issue)"
|
|
|
2038
2038
|
;;
|
|
2039
2039
|
codex)
|
|
2040
2040
|
if command -v codex &>/dev/null; then
|
|
2041
|
-
verdict=$(codex exec --
|
|
2041
|
+
verdict=$(codex exec --sandbox workspace-write "$prompt" 2>/dev/null)
|
|
2042
2042
|
fi
|
|
2043
2043
|
;;
|
|
2044
2044
|
gemini)
|
|
@@ -2139,7 +2139,7 @@ REASON: your reasoning"
|
|
|
2139
2139
|
;;
|
|
2140
2140
|
codex)
|
|
2141
2141
|
if command -v codex &>/dev/null; then
|
|
2142
|
-
verdict=$(codex exec --
|
|
2142
|
+
verdict=$(codex exec --sandbox workspace-write "$prompt" 2>/dev/null)
|
|
2143
2143
|
fi
|
|
2144
2144
|
;;
|
|
2145
2145
|
gemini)
|
package/autonomy/grill.sh
CHANGED
|
@@ -227,7 +227,7 @@ grill_invoke_provider() {
|
|
|
227
227
|
return $GRILL_EXIT_ERROR
|
|
228
228
|
fi
|
|
229
229
|
local out
|
|
230
|
-
out="$(printf '%s' "$prompt" | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" codex exec --
|
|
230
|
+
out="$(printf '%s' "$prompt" | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" codex exec --sandbox workspace-write - 2>/dev/null)"
|
|
231
231
|
if [ -z "$out" ]; then
|
|
232
232
|
_grill_err "provider returned no output"
|
|
233
233
|
return $GRILL_EXIT_ERROR
|
|
@@ -63,31 +63,35 @@ loki_remaining_budget() {
|
|
|
63
63
|
local budget_file="${TARGET_DIR:-.}/.loki/metrics/budget.json"
|
|
64
64
|
local spend="0"
|
|
65
65
|
if [ -f "$budget_file" ]; then
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
# Pass the path via env var (os.environ), NOT string interpolation, so a
|
|
67
|
+
# path containing a single quote (or other python/shell-breaking char)
|
|
68
|
+
# cannot break the parse. Single-quoted program -> bash interpolates nothing.
|
|
69
|
+
spend=$(_LOKI_BUDGET_FILE="$budget_file" python3 -c '
|
|
70
|
+
import json, os, sys
|
|
68
71
|
try:
|
|
69
|
-
with open(
|
|
72
|
+
with open(os.environ["_LOKI_BUDGET_FILE"]) as f:
|
|
70
73
|
d = json.load(f)
|
|
71
|
-
v = d.get(
|
|
74
|
+
v = d.get("current_spend", 0)
|
|
72
75
|
print(float(v))
|
|
73
76
|
except Exception:
|
|
74
77
|
print(0)
|
|
75
|
-
|
|
78
|
+
' 2>/dev/null)
|
|
76
79
|
fi
|
|
77
80
|
# Compute remaining via python3 (bash floats are unreliable across awk/bc variations).
|
|
78
|
-
|
|
79
|
-
|
|
81
|
+
# Pass limit/spend via env vars too (same hardening; single-quoted program).
|
|
82
|
+
_LOKI_BUDGET_LIMIT="$limit" _LOKI_BUDGET_SPEND="$spend" python3 -c '
|
|
83
|
+
import os, sys
|
|
80
84
|
try:
|
|
81
|
-
limit = float(
|
|
82
|
-
spend = float(
|
|
85
|
+
limit = float(os.environ["_LOKI_BUDGET_LIMIT"])
|
|
86
|
+
spend = float(os.environ["_LOKI_BUDGET_SPEND"])
|
|
83
87
|
rem = limit - spend
|
|
84
88
|
# Strictly positive; otherwise emit nothing (caller decides whether to bail or warn).
|
|
85
89
|
if rem > 0:
|
|
86
90
|
# Round to 2 decimal places for the CLI.
|
|
87
|
-
print(f
|
|
91
|
+
print(f"{rem:.2f}")
|
|
88
92
|
except Exception:
|
|
89
93
|
pass
|
|
90
|
-
|
|
94
|
+
' 2>/dev/null
|
|
91
95
|
}
|
|
92
96
|
|
|
93
97
|
# ---------- Fallback model ----------
|
package/autonomy/lib/wiki_llm.py
CHANGED
|
@@ -57,7 +57,7 @@ def invoke_llm(prompt, timeout=120):
|
|
|
57
57
|
|
|
58
58
|
cmds = {
|
|
59
59
|
"claude": ["claude", "-p", prompt],
|
|
60
|
-
"codex": ["codex", "exec", "--
|
|
60
|
+
"codex": ["codex", "exec", "--sandbox", "workspace-write", prompt],
|
|
61
61
|
"cline": ["cline", "-y", prompt],
|
|
62
62
|
"aider": ["aider", "--message", prompt, "--yes-always", "--no-auto-commits"],
|
|
63
63
|
}
|
package/autonomy/loki
CHANGED
|
@@ -743,7 +743,7 @@ show_help() {
|
|
|
743
743
|
echo " --complex Force complex complexity tier (8 phases)"
|
|
744
744
|
echo " --github Enable GitHub issue import"
|
|
745
745
|
echo " --no-dashboard Disable web dashboard"
|
|
746
|
-
echo " --sandbox Run in Docker sandbox for isolation"
|
|
746
|
+
echo " --sandbox Run in Docker sandbox for isolation (default: off; requires Docker)"
|
|
747
747
|
echo " --skip-memory Skip loading memory context at startup"
|
|
748
748
|
echo " --fresh-prd Regenerate the PRD from the codebase (no-PRD runs; ignores the reusable generated PRD)"
|
|
749
749
|
echo " --compliance PRESET Enable compliance mode (default|healthcare|fintech|government)"
|
|
@@ -1063,7 +1063,7 @@ cmd_start() {
|
|
|
1063
1063
|
echo " --github Enable GitHub issue import"
|
|
1064
1064
|
echo " --no-dashboard Disable web dashboard"
|
|
1065
1065
|
echo " --api Start dashboard API server alongside the build"
|
|
1066
|
-
echo " --sandbox Run in Docker sandbox"
|
|
1066
|
+
echo " --sandbox Run in Docker sandbox (default: off; requires Docker)"
|
|
1067
1067
|
echo " --skip-memory Skip loading memory context at startup"
|
|
1068
1068
|
echo " --fresh-prd Regenerate the PRD from the codebase on a no-PRD run"
|
|
1069
1069
|
echo " (ignores the reusable generated PRD; aliases: --regen-prd,"
|
|
@@ -3785,7 +3785,7 @@ cmd_provider_info() {
|
|
|
3785
3785
|
echo "Name: Codex CLI"
|
|
3786
3786
|
echo "Vendor: OpenAI"
|
|
3787
3787
|
echo "CLI: codex"
|
|
3788
|
-
echo "Flag: --
|
|
3788
|
+
echo "Flag: --sandbox workspace-write"
|
|
3789
3789
|
echo ""
|
|
3790
3790
|
echo "Features:"
|
|
3791
3791
|
echo " - Autonomous mode"
|
|
@@ -5680,7 +5680,7 @@ cmd_run() {
|
|
|
5680
5680
|
echo " --simple Force simple complexity tier"
|
|
5681
5681
|
echo " --complex Force complex complexity tier"
|
|
5682
5682
|
echo " --no-dashboard Disable web dashboard"
|
|
5683
|
-
echo " --sandbox Run in Docker sandbox"
|
|
5683
|
+
echo " --sandbox Run in Docker sandbox (default: off; requires Docker)"
|
|
5684
5684
|
echo " --no-plan Skip auto-shown PRD analysis at startup"
|
|
5685
5685
|
echo " --budget USD Set cost budget limit"
|
|
5686
5686
|
echo ""
|
|
@@ -11641,7 +11641,7 @@ except Exception: pass
|
|
|
11641
11641
|
done; } && phase_exit=0 || phase_exit=$?
|
|
11642
11642
|
;;
|
|
11643
11643
|
codex)
|
|
11644
|
-
(cd "$codebase_path" && codex exec --
|
|
11644
|
+
(cd "$codebase_path" && codex exec --sandbox workspace-write "$phase_prompt" 2>&1) || phase_exit=$?
|
|
11645
11645
|
;;
|
|
11646
11646
|
cline)
|
|
11647
11647
|
(cd "$codebase_path" && cline -y "$phase_prompt" 2>&1) || phase_exit=$?
|
|
@@ -11814,7 +11814,7 @@ except Exception: pass
|
|
|
11814
11814
|
done; } && doc_exit=0 || doc_exit=$?
|
|
11815
11815
|
;;
|
|
11816
11816
|
codex)
|
|
11817
|
-
(cd "$codebase_path" && codex exec --
|
|
11817
|
+
(cd "$codebase_path" && codex exec --sandbox workspace-write "$doc_prompt" 2>&1) || doc_exit=$?
|
|
11818
11818
|
;;
|
|
11819
11819
|
cline)
|
|
11820
11820
|
(cd "$codebase_path" && cline -y "$doc_prompt" 2>&1) || doc_exit=$?
|
|
@@ -12445,7 +12445,7 @@ except Exception: pass
|
|
|
12445
12445
|
done && heal_exit=0 || heal_exit=$?
|
|
12446
12446
|
;;
|
|
12447
12447
|
codex)
|
|
12448
|
-
(cd "$codebase_path" && codex exec --
|
|
12448
|
+
(cd "$codebase_path" && codex exec --sandbox workspace-write "$heal_prompt" 2>&1) || heal_exit=$?
|
|
12449
12449
|
;;
|
|
12450
12450
|
cline)
|
|
12451
12451
|
(cd "$codebase_path" && cline -y "$heal_prompt" 2>&1) || heal_exit=$?
|
|
@@ -22069,7 +22069,7 @@ USER TASK: ${prompt}"
|
|
|
22069
22069
|
claude -p "$full_prompt" 2>&1 || agent_exit=$?
|
|
22070
22070
|
;;
|
|
22071
22071
|
codex)
|
|
22072
|
-
codex exec --
|
|
22072
|
+
codex exec --sandbox workspace-write "$full_prompt" 2>&1 || agent_exit=$?
|
|
22073
22073
|
;;
|
|
22074
22074
|
cline)
|
|
22075
22075
|
cline -y "$full_prompt" 2>&1 || agent_exit=$?
|
|
@@ -22200,7 +22200,7 @@ $diff"
|
|
|
22200
22200
|
|
|
22201
22201
|
case "$provider" in
|
|
22202
22202
|
claude) claude -p "$review_prompt" 2>&1 ;;
|
|
22203
|
-
codex) codex exec --
|
|
22203
|
+
codex) codex exec --sandbox workspace-write "$review_prompt" 2>&1 ;;
|
|
22204
22204
|
cline) cline -y "$review_prompt" 2>&1 ;;
|
|
22205
22205
|
*) echo -e "${RED}Unknown provider: $provider${NC}"; return 1 ;;
|
|
22206
22206
|
esac
|
|
@@ -23870,7 +23870,7 @@ _docs_invoke_provider() {
|
|
|
23870
23870
|
result=$($t_prefix env CAVEMAN_DEFAULT_MODE=off claude -p "$prompt" 2>/dev/null) || exit_code=$?
|
|
23871
23871
|
;;
|
|
23872
23872
|
codex)
|
|
23873
|
-
result=$($t_prefix codex exec --
|
|
23873
|
+
result=$($t_prefix codex exec --sandbox workspace-write "$prompt" 2>/dev/null) || exit_code=$?
|
|
23874
23874
|
;;
|
|
23875
23875
|
cline)
|
|
23876
23876
|
result=$($t_prefix cline -y "$prompt" 2>/dev/null) || exit_code=$?
|
|
@@ -422,7 +422,11 @@ checklist_should_verify() {
|
|
|
422
422
|
# non-cooperative agent with filesystem tools can read the reservation directly.
|
|
423
423
|
#
|
|
424
424
|
# Selection is idempotent and reproducible: count = clamp(round(0.25*N), 1, 5)
|
|
425
|
-
# for N>=
|
|
425
|
+
# for N>=2 items; ordering by sha256 of each item's "id" (stable, not random).
|
|
426
|
+
# Small checklists (2 <= N < 4) reserve exactly 1 held-out item via the same
|
|
427
|
+
# sha256-rank selection (the clamp floor of 1 guarantees coverage), so a small
|
|
428
|
+
# spec's checklist is never fully gameable. N<2 is a no-op: holding out the only
|
|
429
|
+
# item of a 1-item checklist would leave nothing to verify against in the loop.
|
|
426
430
|
# Written once to .loki/checklist/held-out.json; never overwritten if present.
|
|
427
431
|
checklist_select_heldout() {
|
|
428
432
|
local heldout_file="${CHECKLIST_DIR:-".loki/checklist"}/held-out.json"
|
|
@@ -442,7 +446,7 @@ checklist_select_heldout() {
|
|
|
442
446
|
# PARTIAL kept=k dropped=d - some prior ids survived; we keep only survivors
|
|
443
447
|
# DUP_SKIP - current checklist ids are not unique; the id-based
|
|
444
448
|
# mechanism is unsound, so we reserve nothing (MEDIUM-2)
|
|
445
|
-
# NOOP - n<
|
|
449
|
+
# NOOP - n<2 with no prior file, or other no-write outcome
|
|
446
450
|
# Honest caveat: re-selection or partial-survival after a regen can reserve
|
|
447
451
|
# items the build loop already saw in earlier prompts (the hidden-from-loop
|
|
448
452
|
# guarantee is best-effort once the checklist ids change mid-run).
|
|
@@ -512,7 +516,7 @@ if os.path.exists(out_path):
|
|
|
512
516
|
|
|
513
517
|
if prior is not None:
|
|
514
518
|
prior_ids = [i for i in prior.get('held_out', []) if i]
|
|
515
|
-
# A prior reservation of [] (e.g. an earlier n<
|
|
519
|
+
# A prior reservation of [] (e.g. an earlier n<2 run) is a valid no-op state;
|
|
516
520
|
# keep it idempotent rather than re-selecting now that n may have grown.
|
|
517
521
|
if not prior_ids:
|
|
518
522
|
print('IDEMPOTENT')
|
|
@@ -525,9 +529,11 @@ if prior is not None:
|
|
|
525
529
|
if not survivors:
|
|
526
530
|
# Fully stale: the checklist regenerated and orphaned the reservation.
|
|
527
531
|
# Deterministically re-select from the CURRENT checklist.
|
|
528
|
-
if n <
|
|
532
|
+
if n < 2:
|
|
533
|
+
# N<2: cannot hold out from a 1-item checklist (reserving the only
|
|
534
|
+
# item leaves nothing to verify against). No-op write of an empty set.
|
|
529
535
|
atomic_write({'held_out': [], 'total_items': n,
|
|
530
|
-
'note': 'n<
|
|
536
|
+
'note': 'n<2: no held-out reserved (re-selected after stale reservation)'})
|
|
531
537
|
print('RESELECTED 0')
|
|
532
538
|
sys.exit(0)
|
|
533
539
|
held = fresh_selection()
|
|
@@ -542,11 +548,15 @@ if prior is not None:
|
|
|
542
548
|
sys.exit(0)
|
|
543
549
|
|
|
544
550
|
# No prior reservation: first selection.
|
|
545
|
-
if n <
|
|
546
|
-
# N
|
|
547
|
-
|
|
551
|
+
if n < 2:
|
|
552
|
+
# N<2 gate: a 1-item (or empty) checklist cannot meaningfully hold out an
|
|
553
|
+
# item -- reserving the only item would leave nothing to verify against in
|
|
554
|
+
# the build loop. Write an empty set so downstream reads stay well-formed.
|
|
555
|
+
atomic_write({'held_out': [], 'total_items': n, 'note': 'n<2: no held-out reserved'})
|
|
548
556
|
print('NOOP')
|
|
549
557
|
sys.exit(0)
|
|
558
|
+
# For 2 <= N < 4, fresh_selection() reserves exactly 1 item (select_count clamps
|
|
559
|
+
# round(0.25*N) up to a floor of 1), so small specs are never fully gameable.
|
|
550
560
|
|
|
551
561
|
held = fresh_selection()
|
|
552
562
|
atomic_write({'held_out': held, 'total_items': n})
|
package/autonomy/run.sh
CHANGED
|
@@ -585,7 +585,7 @@ BACKGROUND_MODE=${LOKI_BACKGROUND:-false} # Run in background
|
|
|
585
585
|
STAGED_AUTONOMY=${LOKI_STAGED_AUTONOMY:-false} # Require plan approval
|
|
586
586
|
AUDIT_LOG_ENABLED=${LOKI_AUDIT_LOG:-true} # Enable audit logging (on by default)
|
|
587
587
|
MAX_PARALLEL_AGENTS=${LOKI_MAX_PARALLEL_AGENTS:-10} # Limit concurrent agents
|
|
588
|
-
SANDBOX_MODE=${LOKI_SANDBOX_MODE:-false} # Docker sandbox mode
|
|
588
|
+
SANDBOX_MODE=${LOKI_SANDBOX_MODE:-false} # Docker sandbox mode (informational; the real dispatch reads LOKI_SANDBOX_MODE at autonomy/loki:1965 and execs sandbox.sh -- this var is not consumed in run.sh)
|
|
589
589
|
ALLOWED_PATHS=${LOKI_ALLOWED_PATHS:-""} # Empty = all paths allowed
|
|
590
590
|
BLOCKED_COMMANDS=${LOKI_BLOCKED_COMMANDS:-"rm -rf /,dd if=,mkfs,:(){ :|:& };:"}
|
|
591
591
|
|
|
@@ -3264,7 +3264,7 @@ spawn_worktree_session() {
|
|
|
3264
3264
|
fi
|
|
3265
3265
|
;;
|
|
3266
3266
|
codex)
|
|
3267
|
-
codex exec --
|
|
3267
|
+
codex exec --sandbox workspace-write --skip-git-repo-check \
|
|
3268
3268
|
"Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
|
|
3269
3269
|
>> "$log_file" 2>&1 || _wt_exit=$?
|
|
3270
3270
|
;;
|
|
@@ -3480,7 +3480,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
|
|
|
3480
3480
|
resolution=$(CAVEMAN_DEFAULT_MODE=off claude "${_cr_argv[@]}" -p "$conflict_prompt" --output-format text 2>/dev/null)
|
|
3481
3481
|
;;
|
|
3482
3482
|
codex)
|
|
3483
|
-
resolution=$(codex exec --
|
|
3483
|
+
resolution=$(codex exec --sandbox workspace-write --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
|
|
3484
3484
|
;;
|
|
3485
3485
|
cline)
|
|
3486
3486
|
resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
|
|
@@ -6199,7 +6199,7 @@ check_command_allowed() {
|
|
|
6199
6199
|
# run.sh does not directly execute arbitrary shell commands from user or agent
|
|
6200
6200
|
# input. Command execution is handled by the AI CLI's own permission model:
|
|
6201
6201
|
# - Claude Code: --dangerously-skip-permissions (with its own allowlist)
|
|
6202
|
-
# - Codex CLI: --
|
|
6202
|
+
# - Codex CLI: exec --sandbox workspace-write or exec --dangerously-bypass-approvals-and-sandbox
|
|
6203
6203
|
#
|
|
6204
6204
|
# HUMAN_INPUT.md content is injected as a text prompt to the AI agent (not
|
|
6205
6205
|
# executed as a shell command), and is already guarded by:
|
|
@@ -8313,6 +8313,89 @@ enforce_mutation_integrity() {
|
|
|
8313
8313
|
return 0
|
|
8314
8314
|
}
|
|
8315
8315
|
|
|
8316
|
+
# ============================================================================
|
|
8317
|
+
# Semantic Test-Authenticity Gate (P1-3): wire tests/detect-semantic-test-problems.sh
|
|
8318
|
+
# as an OPT-IN completion gate. The detector catches the harder class of fake
|
|
8319
|
+
# tests that the regex detectors (gates 5+6) miss: assertions that look real but
|
|
8320
|
+
# verify nothing because the asserted value never flows through code under test
|
|
8321
|
+
# (literal-via-variable echo HIGH, mock-return echo MED, deleted assertions MED).
|
|
8322
|
+
#
|
|
8323
|
+
# ADVISORY-FIRST POSTURE (no-deadlock contract): this helper is invoked ONLY when
|
|
8324
|
+
# LOKI_GATE_SEMANTIC_TESTS=true (the elif guard at the completion-promise arm
|
|
8325
|
+
# short-circuits when off, so there is zero runtime cost on the default path).
|
|
8326
|
+
# When on, it runs the detector with --block-high (clean exit-code contract:
|
|
8327
|
+
# rc 2 iff a CRITICAL/HIGH finding exists). We surface ALL severities to a
|
|
8328
|
+
# findings file (advisory) and return nonzero ONLY on rc 2. Every other exit --
|
|
8329
|
+
# rc 0 (clean), rc 124 (timeout), detector absent, no test files, malformed
|
|
8330
|
+
# output -- returns 0 (pass/fall-through), so the autonomous loop can NEVER
|
|
8331
|
+
# deadlock on a clean run. Mirrors enforce_mock_integrity's invocation
|
|
8332
|
+
# (cd TARGET_DIR + LOKI_SCAN_DIR=TARGET_DIR + timeout), swapping --strict for
|
|
8333
|
+
# --block-high and deciding on the rc-2 contract instead of grepping stdout.
|
|
8334
|
+
# ============================================================================
|
|
8335
|
+
enforce_semantic_integrity() {
|
|
8336
|
+
local loki_dir="${TARGET_DIR:-.}/.loki"
|
|
8337
|
+
local quality_dir="$loki_dir/quality"
|
|
8338
|
+
mkdir -p "$quality_dir"
|
|
8339
|
+
local findings_file="$quality_dir/semantic-findings.txt"
|
|
8340
|
+
local detector="$SCRIPT_DIR/../tests/detect-semantic-test-problems.sh"
|
|
8341
|
+
local gate_timeout="${LOKI_GATE_TIMEOUT:-300}"
|
|
8342
|
+
|
|
8343
|
+
if [ ! -f "$detector" ]; then
|
|
8344
|
+
log_info "Semantic test gate: detector not found, skipping (inconclusive)"
|
|
8345
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
8346
|
+
return 0
|
|
8347
|
+
fi
|
|
8348
|
+
|
|
8349
|
+
local output rc
|
|
8350
|
+
# --block-high exits 2 iff CRITICAL/HIGH present; 0 otherwise (clean wrapper).
|
|
8351
|
+
output=$(cd "${TARGET_DIR:-.}" && LOKI_SCAN_DIR="${TARGET_DIR:-.}" \
|
|
8352
|
+
timeout "$gate_timeout" bash "$detector" --block-high 2>&1)
|
|
8353
|
+
rc=$?
|
|
8354
|
+
|
|
8355
|
+
# timeout exit 124 -- inconclusive, never block on a hang (deny-filter)
|
|
8356
|
+
if [ "$rc" -eq 124 ]; then
|
|
8357
|
+
log_warn "Semantic test gate: detector timed out after ${gate_timeout}s -- inconclusive"
|
|
8358
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
8359
|
+
return 0
|
|
8360
|
+
fi
|
|
8361
|
+
|
|
8362
|
+
if [ "$rc" -eq 2 ]; then
|
|
8363
|
+
# rc 2 == one or more CRITICAL/HIGH findings. Persist per-finding text.
|
|
8364
|
+
{
|
|
8365
|
+
echo "# Semantic test-authenticity findings (CRITICAL/HIGH block this completion)"
|
|
8366
|
+
echo "$output" | grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' || true
|
|
8367
|
+
} > "$findings_file"
|
|
8368
|
+
log_warn "Semantic test gate: CRITICAL/HIGH fake-test problems detected -- BLOCK"
|
|
8369
|
+
return 1
|
|
8370
|
+
fi
|
|
8371
|
+
|
|
8372
|
+
# rc 0 (and any other non-2, non-124 code, e.g. a malformed run) -> PASS.
|
|
8373
|
+
# Route any MED/LOW advisory findings to the injection file, else clear it.
|
|
8374
|
+
local med_low
|
|
8375
|
+
med_low=$(echo "$output" | grep -E '\[(MEDIUM|LOW)\]' || true)
|
|
8376
|
+
if [ -n "$med_low" ]; then
|
|
8377
|
+
{
|
|
8378
|
+
echo "# Semantic test advisory findings (MED/LOW, non-blocking)"
|
|
8379
|
+
echo "$med_low"
|
|
8380
|
+
} > "$findings_file"
|
|
8381
|
+
else
|
|
8382
|
+
rm -f "$findings_file" 2>/dev/null || true
|
|
8383
|
+
fi
|
|
8384
|
+
log_info "Semantic test gate: PASS"
|
|
8385
|
+
return 0
|
|
8386
|
+
}
|
|
8387
|
+
|
|
8388
|
+
# P1-3 wrapper that runs the semantic gate and returns its exact rc, mirroring
|
|
8389
|
+
# _evidence_gate_and_surface so the completion-promise elif arm reads cleanly
|
|
8390
|
+
# (`! _semantic_gate_and_surface`). Returns nonzero ONLY when enforce_semantic_integrity
|
|
8391
|
+
# saw an rc-2 (CRITICAL/HIGH) result; all deny-filter cases already collapse to 0
|
|
8392
|
+
# inside enforce_semantic_integrity, so this never blocks a clean run.
|
|
8393
|
+
_semantic_gate_and_surface() {
|
|
8394
|
+
local _rc=0
|
|
8395
|
+
enforce_semantic_integrity || _rc=$?
|
|
8396
|
+
return "$_rc"
|
|
8397
|
+
}
|
|
8398
|
+
|
|
8316
8399
|
# ============================================================================
|
|
8317
8400
|
# 3-Reviewer Parallel Code Review (v5.35.0)
|
|
8318
8401
|
# Specialist pool from skills/quality-gates.md with blind review
|
|
@@ -8637,7 +8720,7 @@ _dispatch_reviewer() {
|
|
|
8637
8720
|
--output-format text > "$review_output" 2>/dev/null
|
|
8638
8721
|
;;
|
|
8639
8722
|
codex)
|
|
8640
|
-
codex exec --
|
|
8723
|
+
codex exec --sandbox workspace-write --skip-git-repo-check "$prompt_text" \
|
|
8641
8724
|
> "$review_output" 2>/dev/null
|
|
8642
8725
|
;;
|
|
8643
8726
|
cline)
|
|
@@ -9361,7 +9444,7 @@ ADVERSARIAL_EOF
|
|
|
9361
9444
|
;;
|
|
9362
9445
|
codex)
|
|
9363
9446
|
if command -v codex &>/dev/null; then
|
|
9364
|
-
codex exec --
|
|
9447
|
+
codex exec --sandbox workspace-write --skip-git-repo-check "$adversarial_prompt" \
|
|
9365
9448
|
> "$result_file" 2>/dev/null || true
|
|
9366
9449
|
fi
|
|
9367
9450
|
;;
|
|
@@ -12248,6 +12331,23 @@ if d.get('blocked'):
|
|
|
12248
12331
|
gate_failure_context="${gate_failure_context}FIX THESE ISSUES BEFORE PROCEEDING WITH NEW WORK."
|
|
12249
12332
|
fi
|
|
12250
12333
|
|
|
12334
|
+
# P1-3: surface specific semantic test-authenticity findings (which fake test,
|
|
12335
|
+
# which line) when the opt-in gate (LOKI_GATE_SEMANTIC_TESTS) wrote them, so a
|
|
12336
|
+
# block converges: the agent gets the exact files/lines to fix rather than a
|
|
12337
|
+
# bare gate name. The file exists only when the gate ran AND found something
|
|
12338
|
+
# (cleared on clean), so this is zero-cost on the default path and when off.
|
|
12339
|
+
# Mirrors the static-analysis/test-results detail-surfacing above. Surfaced
|
|
12340
|
+
# whether the run blocked (CRIT/HIGH) or only advised (MED/LOW): both inform
|
|
12341
|
+
# the next iteration. Independent of gate-failures.txt presence (the
|
|
12342
|
+
# completion-promise arm does not append a gate token).
|
|
12343
|
+
if [ -f "${TARGET_DIR:-.}/.loki/quality/semantic-findings.txt" ]; then
|
|
12344
|
+
local sem_findings
|
|
12345
|
+
sem_findings=$(grep -E '\[(CRITICAL|HIGH|MEDIUM|LOW)\]' "${TARGET_DIR:-.}/.loki/quality/semantic-findings.txt" 2>/dev/null | head -20 || true)
|
|
12346
|
+
if [ -n "$sem_findings" ]; then
|
|
12347
|
+
gate_failure_context="${gate_failure_context} SEMANTIC TEST-AUTHENTICITY FINDINGS (fix the fake tests; an assertion must verify a value that flows through the code under test, not echo a literal back): ${sem_findings}"
|
|
12348
|
+
fi
|
|
12349
|
+
fi
|
|
12350
|
+
|
|
12251
12351
|
# P2-2: high-severity spec-assumption context. When DISCOVERY recorded any
|
|
12252
12352
|
# high-severity assumption (the spec was ambiguous in a high-impact place),
|
|
12253
12353
|
# surface it to the build agent so it implements with the gap in view (or
|
|
@@ -14717,7 +14817,7 @@ if __name__ == "__main__":
|
|
|
14717
14817
|
# Uses dynamic tier from RARV phase (tier_param already set above)
|
|
14718
14818
|
{ LOKI_CODEX_REASONING_EFFORT="$tier_param" \
|
|
14719
14819
|
CODEX_MODEL_REASONING_EFFORT="$tier_param" \
|
|
14720
|
-
codex exec --
|
|
14820
|
+
codex exec --sandbox workspace-write --skip-git-repo-check \
|
|
14721
14821
|
"$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
|
|
14722
14822
|
} && exit_code=0 || exit_code=$?
|
|
14723
14823
|
;;
|
|
@@ -15347,6 +15447,20 @@ else:
|
|
|
15347
15447
|
log_warn "Completion claim rejected: assumption ledger gate found unresolved high-severity spec assumption(s)."
|
|
15348
15448
|
log_warn " Details under .loki/council/assumption-block.json ; opt out with LOKI_ASSUMPTION_GATE=0"
|
|
15349
15449
|
# Fall through; keep iterating until high-sev assumptions resolve.
|
|
15450
|
+
# P1-3: semantic test-authenticity gate (OPT-IN, default OFF). Catches
|
|
15451
|
+
# fake tests that look real but verify nothing (literal-via-variable
|
|
15452
|
+
# echo etc.) that the regex gates 5+6 miss. ADVISORY-FIRST: the arm is
|
|
15453
|
+
# guarded by LOKI_GATE_SEMANTIC_TESTS=true, so by default it never runs
|
|
15454
|
+
# (zero runtime cost, never blocks). When enabled it runs the detector
|
|
15455
|
+
# with --block-high and rejects the completion ONLY on a CRITICAL/HIGH
|
|
15456
|
+
# finding; clean / no-test-files / detector-absent / timeout / malformed
|
|
15457
|
+
# all collapse to a pass inside _semantic_gate_and_surface, so the
|
|
15458
|
+
# autonomous loop can never deadlock on a clean run. Mirrors the
|
|
15459
|
+
# evidence / held-out / assumption arms above.
|
|
15460
|
+
elif [ "$_completion_claimed" = 1 ] && [ "${LOKI_GATE_SEMANTIC_TESTS:-false}" = "true" ] && type _semantic_gate_and_surface &>/dev/null && ! _semantic_gate_and_surface; then
|
|
15461
|
+
log_warn "Completion claim rejected: semantic test-authenticity gate found CRITICAL/HIGH fake-test problem(s)."
|
|
15462
|
+
log_warn " Details under .loki/quality/semantic-findings.txt ; opt-in gate -- disable with LOKI_GATE_SEMANTIC_TESTS=false"
|
|
15463
|
+
# Fall through; keep iterating until the fake tests are fixed.
|
|
15350
15464
|
elif [ "$_completion_claimed" = 1 ]; then
|
|
15351
15465
|
echo ""
|
|
15352
15466
|
if [ -n "$COMPLETION_PROMISE" ]; then
|
package/dashboard/__init__.py
CHANGED
package/dashboard/server.py
CHANGED
|
@@ -3248,6 +3248,121 @@ async def get_audit_summary(days: int = 7):
|
|
|
3248
3248
|
return audit.get_audit_summary(days=days)
|
|
3249
3249
|
|
|
3250
3250
|
|
|
3251
|
+
# Continuous compliance surface (P3-11).
|
|
3252
|
+
#
|
|
3253
|
+
# Exposes the agent audit chain's compliance posture as an always-available
|
|
3254
|
+
# live endpoint. There is NO background scheduler in this surface (that is
|
|
3255
|
+
# infra, out of scope): the report is regenerated from the CURRENT audit
|
|
3256
|
+
# state on every request, so the endpoint is "continuous" in the sense that
|
|
3257
|
+
# it always reflects live state -- never a stale cached snapshot.
|
|
3258
|
+
#
|
|
3259
|
+
# The report is produced by the authoritative Node compliance engine
|
|
3260
|
+
# (src/audit/index.js, the single source of truth for SOC2/ISO/GDPR control
|
|
3261
|
+
# mappings) via its `report` CLI shim, so the Python surface never
|
|
3262
|
+
# reimplements (and never drifts from) the mapping logic. The chain it reads
|
|
3263
|
+
# is the JS AGENT chain at <project>/.loki/audit/audit.jsonl -- a different
|
|
3264
|
+
# chain from the Python dashboard chain that /api/enterprise/audit serves
|
|
3265
|
+
# (the two are reconciled by the cross-link verifier, not merged), so this
|
|
3266
|
+
# endpoint deliberately does NOT gate on audit.is_audit_enabled() (that flag
|
|
3267
|
+
# governs the Python chain). When the agent chain has no entries the report
|
|
3268
|
+
# is returned honestly with totalAuditEntries == 0; no fabricated pass.
|
|
3269
|
+
_COMPLIANCE_TYPES = ("soc2", "iso27001", "gdpr")
|
|
3270
|
+
|
|
3271
|
+
|
|
3272
|
+
@app.get("/api/compliance", dependencies=[Depends(auth.require_scope("audit"))])
|
|
3273
|
+
def get_compliance_status(report_type: str = Query("soc2", alias="type")):
|
|
3274
|
+
"""Live compliance status for the active project's agent audit chain.
|
|
3275
|
+
|
|
3276
|
+
Auth/tenant scoping: requires the `audit` scope (same gate as the
|
|
3277
|
+
/api/enterprise/audit family). The data is filesystem state scoped to
|
|
3278
|
+
the active project via _get_loki_dir(), exactly like the other
|
|
3279
|
+
.loki-backed read endpoints; there is no DB tenant_id on a JSONL file
|
|
3280
|
+
to enforce against.
|
|
3281
|
+
|
|
3282
|
+
Query: ?type=soc2|iso27001|gdpr (default soc2).
|
|
3283
|
+
|
|
3284
|
+
Returns the compliance report JSON regenerated from CURRENT audit
|
|
3285
|
+
state on every call. If no audit data has been recorded the report is
|
|
3286
|
+
honestly empty (totalAuditEntries == 0), not a fabricated compliant
|
|
3287
|
+
verdict. If the Node engine is unavailable, returns an honest
|
|
3288
|
+
available:false payload (HTTP 200) rather than masquerading as "no
|
|
3289
|
+
compliance".
|
|
3290
|
+
"""
|
|
3291
|
+
if not _read_limiter.check("compliance"):
|
|
3292
|
+
raise HTTPException(status_code=429, detail="Rate limit exceeded")
|
|
3293
|
+
if report_type not in _COMPLIANCE_TYPES:
|
|
3294
|
+
raise HTTPException(
|
|
3295
|
+
status_code=400,
|
|
3296
|
+
detail=f"Invalid type: {report_type}. Must be one of {list(_COMPLIANCE_TYPES)}",
|
|
3297
|
+
)
|
|
3298
|
+
|
|
3299
|
+
import shutil
|
|
3300
|
+
|
|
3301
|
+
# The agent audit chain lives under <project>/.loki/audit; _get_loki_dir()
|
|
3302
|
+
# returns the .loki dir, so the project root is its parent.
|
|
3303
|
+
project_dir = str(_get_loki_dir().parent.resolve())
|
|
3304
|
+
repo_root = _Path(__file__).resolve().parent.parent
|
|
3305
|
+
index_js = repo_root / "src" / "audit" / "index.js"
|
|
3306
|
+
|
|
3307
|
+
node_bin = shutil.which("node")
|
|
3308
|
+
if node_bin is None or not index_js.exists():
|
|
3309
|
+
return {
|
|
3310
|
+
"available": False,
|
|
3311
|
+
"reason": (
|
|
3312
|
+
"Node runtime not found"
|
|
3313
|
+
if node_bin is None
|
|
3314
|
+
else f"compliance engine not found at {index_js}"
|
|
3315
|
+
),
|
|
3316
|
+
"reportType": report_type,
|
|
3317
|
+
"projectDir": project_dir,
|
|
3318
|
+
"report": None,
|
|
3319
|
+
}
|
|
3320
|
+
|
|
3321
|
+
try:
|
|
3322
|
+
proc = subprocess.run(
|
|
3323
|
+
[node_bin, str(index_js), "report", report_type, project_dir],
|
|
3324
|
+
capture_output=True,
|
|
3325
|
+
text=True,
|
|
3326
|
+
timeout=30,
|
|
3327
|
+
check=False,
|
|
3328
|
+
)
|
|
3329
|
+
except (OSError, subprocess.SubprocessError) as exc:
|
|
3330
|
+
return {
|
|
3331
|
+
"available": False,
|
|
3332
|
+
"reason": f"compliance engine invocation failed: {exc}",
|
|
3333
|
+
"reportType": report_type,
|
|
3334
|
+
"projectDir": project_dir,
|
|
3335
|
+
"report": None,
|
|
3336
|
+
}
|
|
3337
|
+
|
|
3338
|
+
if proc.returncode != 0:
|
|
3339
|
+
return {
|
|
3340
|
+
"available": False,
|
|
3341
|
+
"reason": (proc.stderr or "compliance engine returned non-zero").strip()[:500],
|
|
3342
|
+
"reportType": report_type,
|
|
3343
|
+
"projectDir": project_dir,
|
|
3344
|
+
"report": None,
|
|
3345
|
+
}
|
|
3346
|
+
|
|
3347
|
+
try:
|
|
3348
|
+
report = json.loads(proc.stdout.strip())
|
|
3349
|
+
except json.JSONDecodeError:
|
|
3350
|
+
return {
|
|
3351
|
+
"available": False,
|
|
3352
|
+
"reason": "compliance engine produced non-JSON output",
|
|
3353
|
+
"reportType": report_type,
|
|
3354
|
+
"projectDir": project_dir,
|
|
3355
|
+
"report": None,
|
|
3356
|
+
}
|
|
3357
|
+
|
|
3358
|
+
return {
|
|
3359
|
+
"available": True,
|
|
3360
|
+
"reportType": report_type,
|
|
3361
|
+
"projectDir": project_dir,
|
|
3362
|
+
"report": report,
|
|
3363
|
+
}
|
|
3364
|
+
|
|
3365
|
+
|
|
3251
3366
|
# =============================================================================
|
|
3252
3367
|
# File-based Session Endpoints (reads from .loki/ flat files)
|
|
3253
3368
|
# =============================================================================
|
package/docs/INSTALLATION.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
|
|
4
4
|
|
|
5
|
-
**Version:** v7.
|
|
5
|
+
**Version:** v7.53.0
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
@@ -396,7 +396,7 @@ provider works inside the container. Provide auth with your Anthropic API key:
|
|
|
396
396
|
# Run Loki Mode in Docker (Claude provider, API-key auth)
|
|
397
397
|
docker run --rm -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
|
|
398
398
|
-v $(pwd):/workspace -w /workspace \
|
|
399
|
-
asklokesh/loki-mode:7.
|
|
399
|
+
asklokesh/loki-mode:7.53.0 start ./my-spec.md
|
|
400
400
|
```
|
|
401
401
|
|
|
402
402
|
##### docker compose + .env (no host install)
|
package/loki-ts/dist/loki.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// @bun
|
|
2
|
-
var r6=Object.defineProperty;var t6=($)=>$;function i6($,Q){this[$]=t6.bind(null,Q)}var h=($,Q)=>{for(var Z in Q)r6($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:i6.bind(Q,Z)})};var L=($,Q)=>()=>($&&(Q=$($=0)),Q);var K$=import.meta.require;var D1={};h(D1,{lokiDir:()=>P,homeLokiDir:()=>n$,findRepoRootForVersion:()=>o$,REPO_ROOT:()=>g});import{resolve as n,dirname as d$}from"path";import{fileURLToPath as e6}from"url";import{existsSync as P$}from"fs";import{homedir as $Q}from"os";function QQ(){let $=S1;for(let Q=0;Q<6;Q++){if(P$(n($,"VERSION"))&&P$(n($,"autonomy/run.sh")))return $;let Z=d$($);if(Z===$)break;$=Z}return n(S1,"..","..","..")}function o$($){let Q=$;for(let Z=0;Z<6;Z++){if(P$(n(Q,"VERSION"))&&P$(n(Q,"autonomy/run.sh")))return Q;let z=d$(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function n$(){return n($Q(),".loki")}var S1,g;var b=L(()=>{S1=d$(e6(import.meta.url));g=QQ()});import{readFileSync as ZQ}from"fs";import{resolve as zQ,dirname as XQ}from"path";import{fileURLToPath as KQ}from"url";function j$(){if($$!==null)return $$;let $="7.
|
|
2
|
+
var r6=Object.defineProperty;var t6=($)=>$;function i6($,Q){this[$]=t6.bind(null,Q)}var h=($,Q)=>{for(var Z in Q)r6($,Z,{get:Q[Z],enumerable:!0,configurable:!0,set:i6.bind(Q,Z)})};var L=($,Q)=>()=>($&&(Q=$($=0)),Q);var K$=import.meta.require;var D1={};h(D1,{lokiDir:()=>P,homeLokiDir:()=>n$,findRepoRootForVersion:()=>o$,REPO_ROOT:()=>g});import{resolve as n,dirname as d$}from"path";import{fileURLToPath as e6}from"url";import{existsSync as P$}from"fs";import{homedir as $Q}from"os";function QQ(){let $=S1;for(let Q=0;Q<6;Q++){if(P$(n($,"VERSION"))&&P$(n($,"autonomy/run.sh")))return $;let Z=d$($);if(Z===$)break;$=Z}return n(S1,"..","..","..")}function o$($){let Q=$;for(let Z=0;Z<6;Z++){if(P$(n(Q,"VERSION"))&&P$(n(Q,"autonomy/run.sh")))return Q;let z=d$(Q);if(z===Q)break;Q=z}return n($,"..","..","..")}function P(){return process.env.LOKI_DIR??n(process.cwd(),".loki")}function n$(){return n($Q(),".loki")}var S1,g;var b=L(()=>{S1=d$(e6(import.meta.url));g=QQ()});import{readFileSync as ZQ}from"fs";import{resolve as zQ,dirname as XQ}from"path";import{fileURLToPath as KQ}from"url";function j$(){if($$!==null)return $$;let $="7.53.0";if(typeof $==="string"&&$.length>0)return $$=$,$$;try{let Q=XQ(KQ(import.meta.url)),Z=o$(Q);$$=ZQ(zQ(Z,"VERSION"),"utf-8").trim()}catch{$$="unknown"}return $$}var $$=null;var a$=L(()=>{b()});var b1={};h(b1,{runOrThrow:()=>qQ,run:()=>k,commandVersion:()=>WQ,commandExists:()=>f,ShellError:()=>s$});async function k($,Q={}){let Z=Bun.spawn({cmd:[...$],stdout:"pipe",stderr:"pipe",env:Q.env?{...process.env,...Q.env}:process.env,cwd:Q.cwd}),z,X;if(Q.timeoutMs&&Q.timeoutMs>0)z=setTimeout(()=>{try{Z.kill("SIGTERM")}catch{}X=setTimeout(()=>{try{Z.kill("SIGKILL")}catch{}},2000)},Q.timeoutMs);try{let[q,K,W]=await Promise.all([new Response(Z.stdout).text(),new Response(Z.stderr).text(),Z.exited]);return{stdout:q,stderr:K,exitCode:W}}finally{if(z)clearTimeout(z);if(X)clearTimeout(X)}}async function qQ($,Q={}){let Z=await k($,Q);if(Z.exitCode!==0)throw new s$(`command failed (${Z.exitCode}): ${$.join(" ")}`,Z.exitCode,Z.stdout,Z.stderr);return Z}async function f($){let Q=VQ($),Z=await k(["sh","-c",`command -v ${Q}`],{timeoutMs:5000});if(Z.exitCode===0)return Z.stdout.trim()||null;return null}function VQ($){if(!/^[A-Za-z0-9._/-]+$/.test($))throw Error(`refused to shell-escape suspect token: ${$}`);return $}async function WQ($,Q="--version"){if(!await f($))return null;let z=await k([$,Q],{timeoutMs:5000});if(z.exitCode!==0)return null;return((z.stdout||z.stderr).split(/\r?\n/)[0]?.trim()??"")||null}var s$;var d=L(()=>{s$=class s$ extends Error{message;exitCode;stdout;stderr;constructor($,Q,Z,z){super($);this.message=$;this.exitCode=Q;this.stdout=Z;this.stderr=z;this.name="ShellError"}}});function a($){return JQ?"":$}var JQ,T,S,_,wZ,I,R,y,V;var c=L(()=>{JQ=(process.env.NO_COLOR??"").length>0;T=a("\x1B[0;31m"),S=a("\x1B[0;32m"),_=a("\x1B[1;33m"),wZ=a("\x1B[0;34m"),I=a("\x1B[0;36m"),R=a("\x1B[1m"),y=a("\x1B[2m"),V=a("\x1B[0m")});import{existsSync as wQ}from"fs";async function Q$(){if(G$!==void 0)return G$;let $="/opt/homebrew/bin/python3.12";if(wQ($))return G$=$,$;let Q=await f("python3.12");if(Q)return G$=Q,Q;let Z=await f("python3");return G$=Z,Z}async function Z$($,Q={}){let Z=await Q$();if(!Z)return{stdout:"",stderr:"python3 not found",exitCode:127};return k([Z,"-c",$],Q)}var G$;var q$=L(()=>{d()});var e1={};h(e1,{runStatus:()=>uQ});import{existsSync as v,readFileSync as W$,readdirSync as d1,statSync as o1}from"fs";import{resolve as C,basename as DQ}from"path";import{homedir as CQ}from"os";function n1($){let Q=Math.trunc($);if(Q>=1e6)return`${(Math.trunc(Q/1e6*10)/10).toFixed(1)}M`;if(Q>=1000)return`${(Math.trunc(Q/1000*10)/10).toFixed(1)}K`;return String(Q)}function a1($,Q,Z){if(Q===0)return null;let z=Math.trunc($*100/Q),X=Math.trunc($*k$/Q);if(X>k$)X=k$;let q=k$-X,K=S;if(z>=80)K=T;else if(z>=50)K=_;let W="=".repeat(Math.max(0,X))+" ".repeat(Math.max(0,q)),J=n1($),U=n1(Q);return` ${R}${Z}${V} ${K}[${W}]${V} ${z}% (${J} / ${U})`}async function hQ(){if(await f("jq"))return!0;return process.stdout.write(`${T}Error: jq is required but not installed.${V}
|
|
3
3
|
`),process.stdout.write(`Install with:
|
|
4
4
|
`),process.stdout.write(` brew install jq (macOS)
|
|
5
5
|
`),process.stdout.write(` apt install jq (Debian/Ubuntu)
|
|
@@ -790,4 +790,4 @@ Set LOKI_LEGACY_BASH=1 to force the bash CLI for every command.
|
|
|
790
790
|
`),2}default:return process.stderr.write(`Unknown command: ${Q}
|
|
791
791
|
`),process.stderr.write(s6),2}}l1();process.on("SIGINT",()=>process.exit(130));process.on("SIGTERM",()=>process.exit(143));var KZ=await XZ(Bun.argv.slice(2));process.exit(KZ);
|
|
792
792
|
|
|
793
|
-
//# debugId=
|
|
793
|
+
//# debugId=3BF6CF9B99A2BD7E64756E2164756E21
|
package/magic/core/debate.py
CHANGED
|
@@ -482,8 +482,10 @@ class DebateRunner:
|
|
|
482
482
|
if provider == "claude":
|
|
483
483
|
return ["claude", "--dangerously-skip-permissions", "-p", prompt]
|
|
484
484
|
if provider == "codex":
|
|
485
|
-
# Codex uses `exec --
|
|
486
|
-
|
|
485
|
+
# Codex uses `exec --sandbox workspace-write` with the prompt as
|
|
486
|
+
# positional (codex 0.132.0 deprecated --full-auto; workspace-write
|
|
487
|
+
# is the documented replacement, exec is non-interactive by default).
|
|
488
|
+
return ["codex", "exec", "--sandbox", "workspace-write", prompt]
|
|
487
489
|
if provider == "gemini":
|
|
488
490
|
return ["gemini", "--approval-mode=yolo", prompt]
|
|
489
491
|
if provider == "cline":
|
package/magic/core/generator.py
CHANGED
|
@@ -180,7 +180,7 @@ class ComponentGenerator:
|
|
|
180
180
|
if provider == "claude":
|
|
181
181
|
cmd = base_cmd + [binary, "-p", prompt]
|
|
182
182
|
elif provider == "codex":
|
|
183
|
-
cmd = base_cmd + [binary, "exec", "--
|
|
183
|
+
cmd = base_cmd + [binary, "exec", "--sandbox", "workspace-write", prompt]
|
|
184
184
|
elif provider == "gemini":
|
|
185
185
|
cmd = base_cmd + [binary, "--approval-mode=yolo", prompt]
|
|
186
186
|
elif provider == "cline":
|
package/mcp/__init__.py
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "loki-mode",
|
|
3
3
|
"mcpName": "io.github.asklokesh/loki-mode",
|
|
4
|
-
"version": "7.
|
|
4
|
+
"version": "7.53.0",
|
|
5
5
|
"description": "Loki Mode by Autonomi. Autonomous spec-to-product system: takes a PRD, GitHub issue, OpenAPI/JSON/YAML, or one-line brief to a deployed app via the RARV-C closure loop with 8 quality gates. Provider-agnostic (Claude Code, OpenAI Codex, Cline, Aider).",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"agent",
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"$schema": "https://json.schemastore.org/claude-code-plugin-manifest.json",
|
|
3
3
|
"name": "loki-mode",
|
|
4
4
|
"displayName": "Loki Mode",
|
|
5
|
-
"version": "7.
|
|
5
|
+
"version": "7.53.0",
|
|
6
6
|
"description": "Autonomous spec-to-product build system with a built-in trust layer (RARV-C closure loop, 8 quality gates, completion council). Ships Loki's spec-hardening, drift-detection, and deterministic PR verification commands plus the Loki MCP server.",
|
|
7
7
|
"author": {
|
|
8
8
|
"name": "Autonomi",
|
package/providers/codex.sh
CHANGED
|
@@ -29,10 +29,14 @@ PROVIDER_CLI="codex"
|
|
|
29
29
|
|
|
30
30
|
# CLI Invocation
|
|
31
31
|
# Note: codex uses positional prompt after "exec" subcommand
|
|
32
|
-
# VERIFIED:
|
|
33
|
-
#
|
|
32
|
+
# VERIFIED: codex 0.132.0 deprecates --full-auto (prints a deprecation warning
|
|
33
|
+
# and the flag is gone from `codex exec --help`). Use --sandbox workspace-write,
|
|
34
|
+
# which is the documented replacement and the sandbox --full-auto expanded to.
|
|
35
|
+
# `codex exec` is the non-interactive subcommand: it runs at approval "never"
|
|
36
|
+
# with no --ask-for-approval flag, so --sandbox workspace-write alone keeps the
|
|
37
|
+
# loop fully autonomous (verified against codex 0.132.0: no approval prompt).
|
|
34
38
|
# Alternative: "exec --dangerously-bypass-approvals-and-sandbox" (legacy, no sandbox)
|
|
35
|
-
PROVIDER_AUTONOMOUS_FLAG="exec --
|
|
39
|
+
PROVIDER_AUTONOMOUS_FLAG="exec --sandbox workspace-write --skip-git-repo-check"
|
|
36
40
|
PROVIDER_PROMPT_FLAG=""
|
|
37
41
|
PROVIDER_PROMPT_POSITIONAL=true
|
|
38
42
|
|
|
@@ -124,7 +128,7 @@ provider_version() {
|
|
|
124
128
|
provider_invoke() {
|
|
125
129
|
local prompt="$1"
|
|
126
130
|
shift
|
|
127
|
-
codex exec --
|
|
131
|
+
codex exec --sandbox workspace-write --skip-git-repo-check \
|
|
128
132
|
--model "$PROVIDER_MODEL_DEVELOPMENT" \
|
|
129
133
|
"$prompt" "$@"
|
|
130
134
|
}
|
|
@@ -182,11 +186,13 @@ resolve_model_for_tier() {
|
|
|
182
186
|
|
|
183
187
|
# Tier-aware invocation.
|
|
184
188
|
#
|
|
185
|
-
#
|
|
186
|
-
#
|
|
187
|
-
#
|
|
188
|
-
#
|
|
189
|
-
#
|
|
189
|
+
# Aligned with codex CLI 0.132.0 (verified: --full-auto deprecated/removed
|
|
190
|
+
# from `codex exec --help`). `codex exec` is the non-interactive subcommand and
|
|
191
|
+
# runs at approval "never" with no --ask-for-approval flag, so --sandbox
|
|
192
|
+
# workspace-write alone keeps the loop autonomous (verified: no approval prompt
|
|
193
|
+
# on codex 0.132.0). workspace-write is the documented --full-auto replacement
|
|
194
|
+
# and the safer default (scoped disk writes) over danger-full-access; readable
|
|
195
|
+
# in process listings.
|
|
190
196
|
#
|
|
191
197
|
# Optional env knobs:
|
|
192
198
|
# LOKI_CODEX_WEB_SEARCH=true enable codex --search (live web)
|
|
@@ -227,8 +233,7 @@ provider_invoke_with_tier() {
|
|
|
227
233
|
LOKI_CODEX_REASONING_EFFORT="$effort" \
|
|
228
234
|
CODEX_MODEL_REASONING_EFFORT="$effort" \
|
|
229
235
|
codex exec \
|
|
230
|
-
--
|
|
231
|
-
--sandbox danger-full-access \
|
|
236
|
+
--sandbox workspace-write \
|
|
232
237
|
--skip-git-repo-check \
|
|
233
238
|
--model "$model" \
|
|
234
239
|
"${extra_flags[@]}" \
|
|
@@ -286,7 +286,7 @@ All CLI flags have been verified against actual CLI help output:
|
|
|
286
286
|
| Provider | Flag | Verified Version | Notes |
|
|
287
287
|
|----------|------|------------------|-------|
|
|
288
288
|
| Claude | `--dangerously-skip-permissions` | v2.1.34 | Autonomous mode |
|
|
289
|
-
| Codex | `--
|
|
289
|
+
| Codex | `--sandbox workspace-write` | v0.132.0 | Recommended (--full-auto deprecated 0.125+); legacy: `exec --dangerously-bypass-approvals-and-sandbox` |
|
|
290
290
|
| Cline | `--auto-approve` | latest | Autonomous mode |
|
|
291
291
|
| Aider | `--yes-always` | latest | Autonomous mode |
|
|
292
292
|
|
|
@@ -231,13 +231,16 @@ Claude models support an `effort` parameter that controls reasoning depth withou
|
|
|
231
231
|
|
|
232
232
|
**Note:** The effort parameter and thinking prefixes serve different purposes. Effort controls the model's internal reasoning budget; thinking prefixes guide the structure of the response.
|
|
233
233
|
|
|
234
|
-
### Codex --
|
|
234
|
+
### Codex --sandbox workspace-write Flag
|
|
235
235
|
|
|
236
|
-
Codex CLI
|
|
236
|
+
Codex CLI deprecated `--full-auto` in v0.125+ (removed from `codex exec --help`,
|
|
237
|
+
emits a deprecation warning if used). The documented replacement is
|
|
238
|
+
`--sandbox workspace-write`. The `exec` subcommand is non-interactive by default
|
|
239
|
+
(approval: never), so the sandbox flag alone keeps the loop autonomous:
|
|
237
240
|
|
|
238
241
|
```bash
|
|
239
|
-
# Recommended (
|
|
240
|
-
codex --
|
|
242
|
+
# Recommended (codex 0.125+)
|
|
243
|
+
codex exec --sandbox workspace-write "$prompt"
|
|
241
244
|
|
|
242
245
|
# Legacy (still supported)
|
|
243
246
|
codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"
|
package/skills/providers.md
CHANGED
|
@@ -6,7 +6,7 @@ Loki Mode supports four AI providers for autonomous execution.
|
|
|
6
6
|
|
|
7
7
|
> **CLI Flags Verified:** The autonomous mode flags have been verified against actual CLI help output:
|
|
8
8
|
> - Claude: `--dangerously-skip-permissions` (verified)
|
|
9
|
-
> - Codex: `exec --
|
|
9
|
+
> - Codex: `exec --sandbox workspace-write --skip-git-repo-check` (the harness invocation; --skip-git-repo-check required on fresh non-git dirs; --full-auto deprecated in codex 0.125+, workspace-write is the documented replacement) or `exec --dangerously-bypass-approvals-and-sandbox` (legacy)
|
|
10
10
|
|
|
11
11
|
| Feature | Claude Code | OpenAI Codex | Cline CLI | Aider |
|
|
12
12
|
|---------|-------------|--------------|-----------|-------|
|
|
@@ -70,7 +70,7 @@ Task(model="haiku", ...) # Fast tier (parallelize)
|
|
|
70
70
|
**Invocation:**
|
|
71
71
|
```bash
|
|
72
72
|
# Recommended (v0.98.0+)
|
|
73
|
-
codex exec --
|
|
73
|
+
codex exec --sandbox workspace-write --skip-git-repo-check "$prompt"
|
|
74
74
|
|
|
75
75
|
# Legacy (still supported)
|
|
76
76
|
codex exec --dangerously-bypass-approvals-and-sandbox "$prompt"
|
package/skills/quality-gates.md
CHANGED
|
@@ -2,12 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
**Never ship code without passing all quality gates.**
|
|
4
4
|
|
|
5
|
-
## The 8
|
|
5
|
+
## The Quality Gates (8 default-on + 1 opt-in)
|
|
6
6
|
|
|
7
|
-
Every gate below is wired into the orchestration loop (`autonomy/run.sh`)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
Every gate below is wired into the orchestration loop (`autonomy/run.sh`). The 8
|
|
8
|
+
numbered gates are default-on and block completion when they fail; the opt-in
|
|
9
|
+
gate (marked below) is default-OFF and runs only when its flag is set. The table
|
|
10
|
+
lists exactly what each gate detects, what it does NOT detect (so you never
|
|
11
|
+
over-trust a green gate), its opt-out flag, and its blocking behavior. Transcribe
|
|
12
|
+
this list verbatim; do not recompute it.
|
|
11
13
|
|
|
12
14
|
| # | Gate | Detects | Does NOT detect | Blocking | Opt-out flag |
|
|
13
15
|
|---|------|---------|-----------------|----------|--------------|
|
|
@@ -19,6 +21,7 @@ and its blocking behavior. Transcribe this list verbatim; do not recompute it.
|
|
|
19
21
|
| 6 | Test Mutation Detector | Assertion-value churn alongside implementation changes (test-fitting), low assertion density (`tests/detect-test-mutations.sh`); HIGH blocks | Logically-correct-but-weak assertions | Yes (HIGH blocks) | `LOKI_GATE_MUTATION=false` |
|
|
20
22
|
| 7 | Documentation Coverage | README presence, docs freshness within 10 commits, API docs for exported symbols in packages | Whether the docs are accurate or useful | Yes | `LOKI_GATE_DOC_COVERAGE=false` |
|
|
21
23
|
| 8 | Magic Modules Debate | Spec-vs-implementation debate findings on generated Magic Modules; BLOCK-severity findings block | Issues outside the Magic Modules debate scope | Yes (BLOCK severity) | `LOKI_GATE_MAGIC_DEBATE=false` |
|
|
24
|
+
| 9 (opt-in, default OFF) | Semantic Test-Authenticity | Fake tests that look real but verify nothing (literal-via-variable echo, mock-return echo, deleted assertions) that gates 5+6 miss (`tests/detect-semantic-test-problems.sh --block-high`); CRITICAL/HIGH block | Deep dataflow, legitimate computed-literal assertions, Python/shell tests (JS/TS only); MED/LOW are advisory | Only when enabled, and only on CRITICAL/HIGH; runs solely on a completion claim | Opt-IN: `LOKI_GATE_SEMANTIC_TESTS=true` to enable (default off = not invoked, never blocks) |
|
|
22
25
|
|
|
23
26
|
**Severity-based blocking** ties the review gates together: any Critical or High
|
|
24
27
|
finding blocks completion. Medium, Low, and cosmetic findings are advisory and
|
package/src/audit/index.js
CHANGED
|
@@ -83,6 +83,84 @@ function exportReport(type, opts) {
|
|
|
83
83
|
return compliance.exportReportJson(report);
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
+
/**
|
|
87
|
+
* Generate a compliance report as a plain object, with the agent-chain
|
|
88
|
+
* tamper-evidence verdict folded in.
|
|
89
|
+
*
|
|
90
|
+
* This is the object form intended for surfaces (e.g. the dashboard
|
|
91
|
+
* /api/compliance endpoint) that need the report as data rather than a
|
|
92
|
+
* pre-serialized string. It always reflects the REAL audit chain:
|
|
93
|
+
*
|
|
94
|
+
* - The report body is generated from the live audit entries
|
|
95
|
+
* (`_log.readEntries()`), never fabricated.
|
|
96
|
+
* - `chainIntegrity` is populated from `verifyChain()` so the report
|
|
97
|
+
* carries the true tamper-evidence state of the underlying chain.
|
|
98
|
+
* For the SOC2 report this fills the `chainIntegrity: null` slot the
|
|
99
|
+
* generator leaves for the caller; for the other report types it is
|
|
100
|
+
* attached under the same key for a uniform surface contract.
|
|
101
|
+
*
|
|
102
|
+
* When the chain has no entries the report is still returned honestly
|
|
103
|
+
* with `totalAuditEntries: 0` (an empty-but-valid report), never a
|
|
104
|
+
* fabricated "compliant" verdict.
|
|
105
|
+
*
|
|
106
|
+
* @param {string} type - 'soc2', 'iso27001', or 'gdpr'
|
|
107
|
+
* @param {object} [opts] - Report options (projectName, period, etc.)
|
|
108
|
+
* @returns {object} The compliance report object with chainIntegrity set.
|
|
109
|
+
*/
|
|
110
|
+
function getReport(type, opts) {
|
|
111
|
+
if (!_initialized) init();
|
|
112
|
+
var report = generateReport(type, opts);
|
|
113
|
+
// Fold the real tamper-evidence verdict into the report. Do not let a
|
|
114
|
+
// verification error fabricate a pass: capture it honestly instead.
|
|
115
|
+
try {
|
|
116
|
+
report.chainIntegrity = _log.verifyChain();
|
|
117
|
+
} catch (e) {
|
|
118
|
+
report.chainIntegrity = {
|
|
119
|
+
valid: false,
|
|
120
|
+
entries: report.totalAuditEntries || 0,
|
|
121
|
+
brokenAt: null,
|
|
122
|
+
error: 'chain verification failed: ' + String((e && e.message) || e),
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
return report;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* CLI shim so a non-Node surface (e.g. the Python dashboard) can fetch a
|
|
130
|
+
* compliance report for a given project directory as JSON on stdout.
|
|
131
|
+
*
|
|
132
|
+
* This mirrors the inverse of dashboard/audit.py's `_unified_cli()`
|
|
133
|
+
* (which lets the Node-side unified verifier read the Python chain).
|
|
134
|
+
*
|
|
135
|
+
* Invoked as:
|
|
136
|
+
* node src/audit/index.js report <type> <projectDir>
|
|
137
|
+
*
|
|
138
|
+
* <type> is one of soc2 | iso27001 | gdpr. <projectDir> is the project
|
|
139
|
+
* root whose .loki/audit/audit.jsonl chain is read. Prints a single JSON
|
|
140
|
+
* object to stdout. Returns exit 0 on success, 2 on usage error.
|
|
141
|
+
*
|
|
142
|
+
* The report is generated from the REAL chain; an absent/empty chain
|
|
143
|
+
* yields an honest empty report (totalAuditEntries: 0), not a fake pass.
|
|
144
|
+
*/
|
|
145
|
+
function _cli(argv) {
|
|
146
|
+
var args = argv || [];
|
|
147
|
+
var VALID_TYPES = { soc2: true, iso27001: true, gdpr: true };
|
|
148
|
+
if (args.length < 2 || args[0] !== 'report' || !VALID_TYPES[args[1]]) {
|
|
149
|
+
process.stdout.write(JSON.stringify({
|
|
150
|
+
error: 'usage: index.js report {soc2|iso27001|gdpr} <projectDir>',
|
|
151
|
+
}) + '\n');
|
|
152
|
+
return 2;
|
|
153
|
+
}
|
|
154
|
+
var type = args[1];
|
|
155
|
+
var projectDir = args[2] || process.cwd();
|
|
156
|
+
destroy();
|
|
157
|
+
init(projectDir);
|
|
158
|
+
var report = getReport(type);
|
|
159
|
+
destroy();
|
|
160
|
+
process.stdout.write(JSON.stringify(report) + '\n');
|
|
161
|
+
return 0;
|
|
162
|
+
}
|
|
163
|
+
|
|
86
164
|
/**
|
|
87
165
|
* Check if a provider is allowed by data residency policy.
|
|
88
166
|
*/
|
|
@@ -167,6 +245,7 @@ module.exports = {
|
|
|
167
245
|
verifyChain: verifyChain,
|
|
168
246
|
generateReport: generateReport,
|
|
169
247
|
exportReport: exportReport,
|
|
248
|
+
getReport: getReport,
|
|
170
249
|
checkProvider: checkProvider,
|
|
171
250
|
isAirGapped: isAirGapped,
|
|
172
251
|
readEntries: readEntries,
|
|
@@ -177,3 +256,8 @@ module.exports = {
|
|
|
177
256
|
verifyUnified: verifyUnified,
|
|
178
257
|
writeWitness: writeWitness,
|
|
179
258
|
};
|
|
259
|
+
|
|
260
|
+
// CLI entry point: `node src/audit/index.js report <type> <projectDir>`.
|
|
261
|
+
if (require.main === module) {
|
|
262
|
+
process.exit(_cli(process.argv.slice(2)));
|
|
263
|
+
}
|