loki-mode 7.26.0 → 7.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -13
- package/SKILL.md +11 -2
- package/VERSION +1 -1
- package/autonomy/completion-council.sh +310 -6
- package/autonomy/context-tracker.py +32 -7
- package/autonomy/grill.sh +321 -0
- package/autonomy/lib/trust_metrics.py +636 -0
- package/autonomy/loki +142 -0
- package/autonomy/prd-checklist.sh +248 -14
- package/autonomy/run.sh +283 -32
- package/autonomy/spec.sh +646 -0
- package/autonomy/verify.sh +1130 -0
- package/dashboard/__init__.py +1 -1
- package/dashboard/static/index.html +1 -1
- package/docs/COMPARISON.md +9 -9
- package/docs/COMPETITIVE-ANALYSIS.md +18 -37
- package/docs/INSTALLATION.md +1 -1
- package/docs/auto-claude-comparison.md +9 -6
- package/docs/certification/01-core-concepts/lesson.md +3 -3
- package/docs/competitive/emergence-others-analysis.md +1 -1
- package/docs/competitive/replit-lovable-analysis.md +1 -1
- package/docs/cursor-comparison.md +1 -1
- package/docs/prd-purple-lab-platform.md +1 -1
- package/docs/show-hn-post.md +2 -2
- package/loki-ts/dist/loki.js +2 -2
- package/mcp/__init__.py +1 -1
- package/package.json +2 -1
- package/providers/codex.sh +3 -2
- package/references/agent-types.md +9 -9
- package/references/agents.md +8 -8
- package/references/business-ops.md +1 -1
- package/references/competitive-analysis.md +1 -1
- package/skills/agents.md +3 -3
- package/skills/providers.md +3 -3
- package/skills/quality-gates.md +46 -0
package/autonomy/run.sh
CHANGED
|
@@ -1226,6 +1226,94 @@ emit_event_json() {
|
|
|
1226
1226
|
log_debug "Event: $event_type - $json_data"
|
|
1227
1227
|
}
|
|
1228
1228
|
|
|
1229
|
+
# Trust-layer metrics event writer (benchmark program section 3). Appends one
|
|
1230
|
+
# durable record per trust event to .loki/metrics/trust-events.jsonl via the
|
|
1231
|
+
# Python writer (single source of truth for the JSONL schema). This is ADDITIVE
|
|
1232
|
+
# and purely a side effect: it writes nothing to stdout, ignores all errors, and
|
|
1233
|
+
# never alters control flow or any caller's return value. The single-state
|
|
1234
|
+
# control files (evidence-block.json, gate-failure-count.json) are untouched;
|
|
1235
|
+
# this log exists because those files are erased on the successful-run path,
|
|
1236
|
+
# losing exactly the self-correction events the trust metrics publish.
|
|
1237
|
+
# Resolve a stable, UNIQUE-PER-RUN id for the trust event log. The cross-run
|
|
1238
|
+
# denominators (block rate, gate distribution) require ids that are distinct per
|
|
1239
|
+
# run. A persisted per-run file is the source of truth, NOT LOKI_SESSION_ID:
|
|
1240
|
+
# - On `loki start ./prd.md`, LOKI_SESSION_ID is unset entirely.
|
|
1241
|
+
# - On `loki run <issue>`, LOKI_SESSION_ID is the issue NUMBER, which is stable
|
|
1242
|
+
# across re-runs by design (so `loki stop <n>` works); using it would merge
|
|
1243
|
+
# every re-run of the same issue into one bucket and skew the rates.
|
|
1244
|
+
# So a fresh run always MINTS a new unique id into .loki/state/trust-run-id, and
|
|
1245
|
+
# every later event in that run reads it back. LOKI_SESSION_ID is only a
|
|
1246
|
+
# last-resort fallback when no minted file exists (e.g. an event fired before
|
|
1247
|
+
# any run_start, which the aggregator then treats as un-instrumented anyway).
|
|
1248
|
+
# Events never join to proof.json (Metrics 1-3 are events-only, Metric 4 is
|
|
1249
|
+
# proofs-only), so intra-log uniqueness is the only requirement.
|
|
1250
|
+
# Usage: _loki_trust_run_id [--new]
|
|
1251
|
+
_loki_trust_run_id() {
|
|
1252
|
+
local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
|
|
1253
|
+
local id_file="$loki_dir/state/trust-run-id"
|
|
1254
|
+
if [ "${1:-}" = "--new" ]; then
|
|
1255
|
+
# Fresh run: mint a new unique id (epoch + pid + short random) and
|
|
1256
|
+
# persist it as the source of truth for this run's events.
|
|
1257
|
+
local new_id
|
|
1258
|
+
new_id="run-$(date -u +%Y%m%d%H%M%S)-$$-${RANDOM:-0}"
|
|
1259
|
+
mkdir -p "$loki_dir/state" 2>/dev/null || true
|
|
1260
|
+
printf '%s' "$new_id" > "$id_file" 2>/dev/null || true
|
|
1261
|
+
printf '%s' "$new_id"
|
|
1262
|
+
return 0
|
|
1263
|
+
fi
|
|
1264
|
+
# Read path: the minted per-run file wins over LOKI_SESSION_ID so a resume
|
|
1265
|
+
# in a separate process (no exported LOKI_TRUST_RUN_ID) still resolves to
|
|
1266
|
+
# the same run, and a stable issue-number session id never collapses re-runs.
|
|
1267
|
+
if [ -s "$id_file" ]; then
|
|
1268
|
+
cat "$id_file" 2>/dev/null || true
|
|
1269
|
+
return 0
|
|
1270
|
+
fi
|
|
1271
|
+
if [ -n "${LOKI_SESSION_ID:-}" ]; then
|
|
1272
|
+
printf '%s' "$LOKI_SESSION_ID"
|
|
1273
|
+
return 0
|
|
1274
|
+
fi
|
|
1275
|
+
# No persisted id and no session id: empty -> writer records "unknown".
|
|
1276
|
+
printf '%s' ""
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
# Usage: record_trust_event_bash <event_type> [key=value ...]
|
|
1280
|
+
# Pass LOKI_TRUST_RUN_ID in the environment to override the resolved id (the
|
|
1281
|
+
# run_start site sets it to the freshly minted id so the first event matches).
|
|
1282
|
+
record_trust_event_bash() {
|
|
1283
|
+
local event_type="$1"
|
|
1284
|
+
shift || true
|
|
1285
|
+
local tm_mod="$SCRIPT_DIR/lib/trust_metrics.py"
|
|
1286
|
+
[ -f "$tm_mod" ] || return 0
|
|
1287
|
+
command -v python3 >/dev/null 2>&1 || return 0
|
|
1288
|
+
local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
|
|
1289
|
+
local run_id="${LOKI_TRUST_RUN_ID:-$(_loki_trust_run_id)}"
|
|
1290
|
+
# Pass kv pairs as argv so Python parses (no shell JSON building). All
|
|
1291
|
+
# values stay strings except where the reader coerces (iteration -> int).
|
|
1292
|
+
_TM_LOKI_DIR="$loki_dir" \
|
|
1293
|
+
_TM_MOD_PATH="$tm_mod" \
|
|
1294
|
+
_TM_EVENT_TYPE="$event_type" \
|
|
1295
|
+
_TM_RUN_ID="$run_id" \
|
|
1296
|
+
_TM_ITERATION="${ITERATION_COUNT:-0}" \
|
|
1297
|
+
python3 - "$@" <<'TRUST_EVENT_PY' >/dev/null 2>&1 || true
|
|
1298
|
+
import os, sys, importlib.util
|
|
1299
|
+
spec = importlib.util.spec_from_file_location("trust_metrics", os.environ["_TM_MOD_PATH"])
|
|
1300
|
+
tm = importlib.util.module_from_spec(spec)
|
|
1301
|
+
spec.loader.exec_module(tm)
|
|
1302
|
+
fields = {}
|
|
1303
|
+
for arg in sys.argv[1:]:
|
|
1304
|
+
if "=" in arg:
|
|
1305
|
+
k, v = arg.split("=", 1)
|
|
1306
|
+
fields[k] = v
|
|
1307
|
+
tm.record_trust_event(
|
|
1308
|
+
os.environ["_TM_LOKI_DIR"],
|
|
1309
|
+
os.environ["_TM_EVENT_TYPE"],
|
|
1310
|
+
run_id=os.environ.get("_TM_RUN_ID", "") or None,
|
|
1311
|
+
iteration=os.environ.get("_TM_ITERATION", "0"),
|
|
1312
|
+
**fields,
|
|
1313
|
+
)
|
|
1314
|
+
TRUST_EVENT_PY
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1229
1317
|
# v7.0.2: Bash helper to emit a managed-agents event to the dashboard's
|
|
1230
1318
|
# managed event log (.loki/managed/events.ndjson). Mirrors the Python
|
|
1231
1319
|
# emit_managed_event helper so bash callers can land events in the same
|
|
@@ -2478,6 +2566,26 @@ except Exception:
|
|
|
2478
2566
|
local ts
|
|
2479
2567
|
ts="$(date -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || date)"
|
|
2480
2568
|
|
|
2569
|
+
# v7.28.0: evidence-gate inconclusive line. When the evidence gate could not
|
|
2570
|
+
# establish a diff baseline (no git repo, or no run-start SHA), it records a
|
|
2571
|
+
# durable .loki/state/evidence-inconclusive.json instead of silently passing.
|
|
2572
|
+
# Surface one honest line so the user knows completion was not independently
|
|
2573
|
+
# verified. The record is removed by the gate on any conclusive run.
|
|
2574
|
+
local evidence_inconclusive_line=""
|
|
2575
|
+
local _inc_file="$loki_dir/state/evidence-inconclusive.json"
|
|
2576
|
+
if [ -f "$_inc_file" ]; then
|
|
2577
|
+
local _inc_reason
|
|
2578
|
+
_inc_reason="$(python3 -c "import json,sys
|
|
2579
|
+
try:
|
|
2580
|
+
d=json.load(open(sys.argv[1]))
|
|
2581
|
+
print(d.get('reason','') if d.get('inconclusive') else '')
|
|
2582
|
+
except Exception:
|
|
2583
|
+
print('')" "$_inc_file" 2>/dev/null)"
|
|
2584
|
+
if [ -n "$_inc_reason" ]; then
|
|
2585
|
+
evidence_inconclusive_line="Evidence gate: inconclusive (${_inc_reason}) - completion not independently verified"
|
|
2586
|
+
fi
|
|
2587
|
+
fi
|
|
2588
|
+
|
|
2481
2589
|
# ---- Durable human-readable file: .loki/COMPLETION.txt --------------------
|
|
2482
2590
|
{
|
|
2483
2591
|
echo "Loki Mode run summary"
|
|
@@ -2508,6 +2616,10 @@ except Exception:
|
|
|
2508
2616
|
fi
|
|
2509
2617
|
echo "Tasks: pending=$pending in_progress=$in_progress completed=$completed failed=$failed"
|
|
2510
2618
|
echo ""
|
|
2619
|
+
if [ -n "$evidence_inconclusive_line" ]; then
|
|
2620
|
+
echo "$evidence_inconclusive_line"
|
|
2621
|
+
echo ""
|
|
2622
|
+
fi
|
|
2511
2623
|
echo "Review the work:"
|
|
2512
2624
|
echo " $review_cmd"
|
|
2513
2625
|
echo ""
|
|
@@ -2916,7 +3028,7 @@ spawn_worktree_session() {
|
|
|
2916
3028
|
>> "$log_file" 2>&1 || _wt_exit=$?
|
|
2917
3029
|
;;
|
|
2918
3030
|
codex)
|
|
2919
|
-
codex exec --full-auto \
|
|
3031
|
+
codex exec --full-auto --skip-git-repo-check \
|
|
2920
3032
|
"Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
|
|
2921
3033
|
>> "$log_file" 2>&1 || _wt_exit=$?
|
|
2922
3034
|
;;
|
|
@@ -3117,7 +3229,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
|
|
|
3117
3229
|
resolution=$(claude --dangerously-skip-permissions -p "$conflict_prompt" --output-format text 2>/dev/null)
|
|
3118
3230
|
;;
|
|
3119
3231
|
codex)
|
|
3120
|
-
resolution=$(codex exec --full-auto "$conflict_prompt" 2>/dev/null)
|
|
3232
|
+
resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
|
|
3121
3233
|
;;
|
|
3122
3234
|
cline)
|
|
3123
3235
|
resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
|
|
@@ -4602,6 +4714,57 @@ print_ttfv_next_steps() {
|
|
|
4602
4714
|
return 0
|
|
4603
4715
|
}
|
|
4604
4716
|
|
|
4717
|
+
# _read_iteration_cost <iteration>
|
|
4718
|
+
# Emit "input output cost cache_read cache_creation" for the given iteration,
|
|
4719
|
+
# preferring the authoritative result-cost file written by the embedded stream
|
|
4720
|
+
# parser (Claude'\''s own total_cost_usd + usage, slug/symlink-independent) over
|
|
4721
|
+
# the context-tracker-derived estimate in tracking.json. Falls back to
|
|
4722
|
+
# tracking.json when no result-cost file exists, and to all zeros otherwise.
|
|
4723
|
+
# Best-effort: any parse failure yields "0 0 0 0 0" and never aborts.
|
|
4724
|
+
_read_iteration_cost() {
|
|
4725
|
+
local iteration="$1"
|
|
4726
|
+
local result_cost_file=".loki/metrics/result-cost-${iteration}.json"
|
|
4727
|
+
if [ -f "$result_cost_file" ]; then
|
|
4728
|
+
python3 -c "
|
|
4729
|
+
import json
|
|
4730
|
+
try:
|
|
4731
|
+
d = json.load(open('$result_cost_file'))
|
|
4732
|
+
print(
|
|
4733
|
+
d.get('input_tokens', 0) or 0,
|
|
4734
|
+
d.get('output_tokens', 0) or 0,
|
|
4735
|
+
d.get('total_cost_usd', 0) or 0,
|
|
4736
|
+
d.get('cache_read_tokens', 0) or 0,
|
|
4737
|
+
d.get('cache_creation_tokens', 0) or 0,
|
|
4738
|
+
)
|
|
4739
|
+
except Exception:
|
|
4740
|
+
print(0, 0, 0, 0, 0)
|
|
4741
|
+
" 2>/dev/null || echo "0 0 0 0 0"
|
|
4742
|
+
elif [ -f ".loki/context/tracking.json" ]; then
|
|
4743
|
+
python3 -c "
|
|
4744
|
+
import json
|
|
4745
|
+
try:
|
|
4746
|
+
t = json.load(open('.loki/context/tracking.json'))
|
|
4747
|
+
iters = t.get('per_iteration', [])
|
|
4748
|
+
match = [i for i in iters if i.get('iteration') == $iteration]
|
|
4749
|
+
if match:
|
|
4750
|
+
m = match[-1]
|
|
4751
|
+
print(
|
|
4752
|
+
m.get('input_tokens', 0),
|
|
4753
|
+
m.get('output_tokens', 0),
|
|
4754
|
+
m.get('cost_usd', 0),
|
|
4755
|
+
m.get('cache_read_tokens', 0),
|
|
4756
|
+
m.get('cache_creation_tokens', 0),
|
|
4757
|
+
)
|
|
4758
|
+
else:
|
|
4759
|
+
print(0, 0, 0, 0, 0)
|
|
4760
|
+
except Exception:
|
|
4761
|
+
print(0, 0, 0, 0, 0)
|
|
4762
|
+
" 2>/dev/null || echo "0 0 0 0 0"
|
|
4763
|
+
else
|
|
4764
|
+
echo "0 0 0 0 0"
|
|
4765
|
+
fi
|
|
4766
|
+
}
|
|
4767
|
+
|
|
4605
4768
|
track_iteration_complete() {
|
|
4606
4769
|
local iteration="$1"
|
|
4607
4770
|
local exit_code="${2:-0}"
|
|
@@ -4684,32 +4847,14 @@ track_iteration_complete() {
|
|
|
4684
4847
|
local phase="${LAST_KNOWN_PHASE:-}"
|
|
4685
4848
|
[ -z "$phase" ] && phase=$(python3 -c "import json; print(json.load(open('.loki/state/orchestrator.json')).get('currentPhase', 'unknown'))" 2>/dev/null || echo "unknown")
|
|
4686
4849
|
|
|
4687
|
-
# Read token data
|
|
4850
|
+
# Read token data, preferring Claude'\''s authoritative result-cost file over
|
|
4851
|
+
# the context-tracker estimate (v7.28.0 cost-capture fix). See
|
|
4852
|
+
# _read_iteration_cost for precedence rationale.
|
|
4688
4853
|
# v6.82.0: also capture cache_read_tokens / cache_creation_tokens for
|
|
4689
4854
|
# prompt-cache hit-rate analysis (S1.1 prompt restructure).
|
|
4690
4855
|
local iter_input=0 iter_output=0 iter_cost=0
|
|
4691
4856
|
local iter_cache_read=0 iter_cache_creation=0
|
|
4692
|
-
|
|
4693
|
-
read iter_input iter_output iter_cost iter_cache_read iter_cache_creation < <(python3 -c "
|
|
4694
|
-
import json
|
|
4695
|
-
try:
|
|
4696
|
-
t = json.load(open('.loki/context/tracking.json'))
|
|
4697
|
-
iters = t.get('per_iteration', [])
|
|
4698
|
-
match = [i for i in iters if i.get('iteration') == $iteration]
|
|
4699
|
-
if match:
|
|
4700
|
-
m = match[-1]
|
|
4701
|
-
print(
|
|
4702
|
-
m.get('input_tokens', 0),
|
|
4703
|
-
m.get('output_tokens', 0),
|
|
4704
|
-
m.get('cost_usd', 0),
|
|
4705
|
-
m.get('cache_read_tokens', 0),
|
|
4706
|
-
m.get('cache_creation_tokens', 0),
|
|
4707
|
-
)
|
|
4708
|
-
else:
|
|
4709
|
-
print(0, 0, 0, 0, 0)
|
|
4710
|
-
except: print(0, 0, 0, 0, 0)
|
|
4711
|
-
" 2>/dev/null || echo "0 0 0 0 0")
|
|
4712
|
-
fi
|
|
4857
|
+
read -r iter_input iter_output iter_cost iter_cache_read iter_cache_creation < <(_read_iteration_cost "$iteration")
|
|
4713
4858
|
|
|
4714
4859
|
cat > ".loki/metrics/efficiency/iteration-${iteration}.json" << EFF_EOF
|
|
4715
4860
|
{
|
|
@@ -6551,6 +6696,13 @@ print(counts[gate_name])
|
|
|
6551
6696
|
loki_crash_friction "gate_failure" "gate=${gate_name} consecutive=${count}" >/dev/null 2>&1 || true
|
|
6552
6697
|
fi
|
|
6553
6698
|
|
|
6699
|
+
# Trust-metrics: append a durable per-failure record so the gate-failure
|
|
6700
|
+
# distribution survives clear_gate_failure (which resets the running
|
|
6701
|
+
# counter). CRITICAL: this function's stdout IS its return value, so the
|
|
6702
|
+
# write is fully stdout-suppressed and best-effort; it cannot change the
|
|
6703
|
+
# echoed count or any gate behavior.
|
|
6704
|
+
record_trust_event_bash "gate_failure" "gate=${gate_name}" "consecutive=${count}" >/dev/null 2>&1 || true
|
|
6705
|
+
|
|
6554
6706
|
echo "$count"
|
|
6555
6707
|
}
|
|
6556
6708
|
|
|
@@ -7500,7 +7652,7 @@ BUILD_PROMPT
|
|
|
7500
7652
|
--output-format text > "$review_output" 2>/dev/null
|
|
7501
7653
|
;;
|
|
7502
7654
|
codex)
|
|
7503
|
-
codex exec --full-auto "$prompt_text" \
|
|
7655
|
+
codex exec --full-auto --skip-git-repo-check "$prompt_text" \
|
|
7504
7656
|
> "$review_output" 2>/dev/null
|
|
7505
7657
|
;;
|
|
7506
7658
|
cline)
|
|
@@ -7715,7 +7867,7 @@ ADVERSARIAL_EOF
|
|
|
7715
7867
|
;;
|
|
7716
7868
|
codex)
|
|
7717
7869
|
if command -v codex &>/dev/null; then
|
|
7718
|
-
codex exec --full-auto "$adversarial_prompt" \
|
|
7870
|
+
codex exec --full-auto --skip-git-repo-check "$adversarial_prompt" \
|
|
7719
7871
|
> "$result_file" 2>/dev/null || true
|
|
7720
7872
|
fi
|
|
7721
7873
|
;;
|
|
@@ -11900,6 +12052,19 @@ run_autonomous() {
|
|
|
11900
12052
|
_LOKI_RUN_START_SHA="$(cat "$_start_sha_file" 2>/dev/null || echo "")"
|
|
11901
12053
|
export _LOKI_RUN_START_SHA
|
|
11902
12054
|
|
|
12055
|
+
# Trust-metrics instrumentation marker: record one run_start event per
|
|
12056
|
+
# fresh run so the trust-metrics denominator counts ONLY instrumented runs.
|
|
12057
|
+
# This is what lets the aggregator distinguish "0 blocks measured" from
|
|
12058
|
+
# "this run predates instrumentation" (the central honesty rule). Additive,
|
|
12059
|
+
# best-effort, stdout-silent; never affects control flow. Mint a fresh
|
|
12060
|
+
# per-run id here and export it so every later event in this run shares it
|
|
12061
|
+
# (LOKI_SESSION_ID is absent on the `loki start` path).
|
|
12062
|
+
if [ "${ITERATION_COUNT:-0}" -eq 0 ]; then
|
|
12063
|
+
LOKI_TRUST_RUN_ID="$(_loki_trust_run_id --new)"
|
|
12064
|
+
export LOKI_TRUST_RUN_ID
|
|
12065
|
+
record_trust_event_bash "run_start" "start_sha=${_LOKI_RUN_START_SHA:-}" 2>/dev/null || true
|
|
12066
|
+
fi
|
|
12067
|
+
|
|
11903
12068
|
# Notify dashboard of active project directory (for AI Chat cross-directory usage)
|
|
11904
12069
|
if command -v curl &>/dev/null; then
|
|
11905
12070
|
local project_cwd
|
|
@@ -12244,8 +12409,15 @@ except Exception as exc:
|
|
|
12244
12409
|
claude)
|
|
12245
12410
|
# Claude: Full features with stream-json output and agent tracking
|
|
12246
12411
|
# Uses dynamic tier for model selection based on RARV phase
|
|
12247
|
-
# Pass tier to
|
|
12248
|
-
|
|
12412
|
+
# Pass tier + iteration to the embedded stream parser via the
|
|
12413
|
+
# environment. A bare `VAR=val cmd | parser` prefix applies ONLY
|
|
12414
|
+
# to `cmd` (claude) and does NOT cross the pipe to the parser
|
|
12415
|
+
# subprocess, so these must be exported into the shell env first.
|
|
12416
|
+
# LOKI_ITERATION lets the parser stamp the authoritative
|
|
12417
|
+
# result-cost file under the correct iteration index.
|
|
12418
|
+
export LOKI_CURRENT_MODEL="$tier_param"
|
|
12419
|
+
export LOKI_ITERATION="$ITERATION_COUNT"
|
|
12420
|
+
{ \
|
|
12249
12421
|
claude "${_loki_claude_argv[@]}" -p "$prompt" \
|
|
12250
12422
|
--output-format stream-json --verbose 2>&1 | \
|
|
12251
12423
|
tee -a "$log_file" "$agent_log" "$iter_output" | \
|
|
@@ -12558,6 +12730,34 @@ def process_stream():
|
|
|
12558
12730
|
active_agents[orchestrator_id]["tasks_completed"].append(f"{tool_count} tools used")
|
|
12559
12731
|
|
|
12560
12732
|
save_agents()
|
|
12733
|
+
|
|
12734
|
+
# Authoritative cost capture (path/slug/symlink-independent).
|
|
12735
|
+
# Claude'"'"'s result message carries its own total_cost_usd plus a
|
|
12736
|
+
# full usage object. The context-tracker session-file path is
|
|
12737
|
+
# brittle (slug derivation must guess Claude'"'"'s naming), so this
|
|
12738
|
+
# stamps the authoritative number to a per-iteration file that
|
|
12739
|
+
# the efficiency writer prefers. Best-effort: a malformed or
|
|
12740
|
+
# missing field must never break the iteration loop.
|
|
12741
|
+
try:
|
|
12742
|
+
_iter = os.environ.get("LOKI_ITERATION", "0")
|
|
12743
|
+
_u = data.get("usage", {}) or {}
|
|
12744
|
+
_rec = {
|
|
12745
|
+
"total_cost_usd": data.get("total_cost_usd"),
|
|
12746
|
+
"input_tokens": _u.get("input_tokens", 0),
|
|
12747
|
+
"output_tokens": _u.get("output_tokens", 0),
|
|
12748
|
+
"cache_read_tokens": _u.get("cache_read_input_tokens", 0),
|
|
12749
|
+
"cache_creation_tokens": _u.get("cache_creation_input_tokens", 0),
|
|
12750
|
+
}
|
|
12751
|
+
if _rec["total_cost_usd"] is not None:
|
|
12752
|
+
os.makedirs(".loki/metrics", exist_ok=True)
|
|
12753
|
+
_p = ".loki/metrics/result-cost-" + str(_iter) + ".json"
|
|
12754
|
+
_tmp = _p + ".tmp"
|
|
12755
|
+
with open(_tmp, "w") as _f:
|
|
12756
|
+
json.dump(_rec, _f)
|
|
12757
|
+
os.replace(_tmp, _p)
|
|
12758
|
+
except Exception:
|
|
12759
|
+
pass
|
|
12760
|
+
|
|
12561
12761
|
print(f"\n{GREEN}[Session complete]{NC}", flush=True)
|
|
12562
12762
|
is_error = data.get("is_error", False)
|
|
12563
12763
|
sys.exit(1 if is_error else 0)
|
|
@@ -12586,7 +12786,7 @@ if __name__ == "__main__":
|
|
|
12586
12786
|
# Uses dynamic tier from RARV phase (tier_param already set above)
|
|
12587
12787
|
{ LOKI_CODEX_REASONING_EFFORT="$tier_param" \
|
|
12588
12788
|
CODEX_MODEL_REASONING_EFFORT="$tier_param" \
|
|
12589
|
-
codex exec --full-auto \
|
|
12789
|
+
codex exec --full-auto --skip-git-repo-check \
|
|
12590
12790
|
"$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
|
|
12591
12791
|
} && exit_code=0 || exit_code=$?
|
|
12592
12792
|
;;
|
|
@@ -12990,7 +13190,36 @@ if __name__ == "__main__":
|
|
|
12990
13190
|
case "${gate_failures:-}" in
|
|
12991
13191
|
*code_review,*|*code_review_ESCALATED*) _gate_block_for_completion="code_review" ;;
|
|
12992
13192
|
esac
|
|
12993
|
-
|
|
13193
|
+
# DROP-FIX (v7.28): check_completion_promise -> check_task_completion_signal
|
|
13194
|
+
# CONSUMES the completion signal (rm -f) on the FIRST successful call.
|
|
13195
|
+
# The completion-promise chain below calls it up to five times in one
|
|
13196
|
+
# iteration (reverify guard, code-review arm, evidence arm, held-out
|
|
13197
|
+
# arm, success arm), so the first call consumed the claim and every
|
|
13198
|
+
# later arm saw nothing -- the success arm never fired and the run
|
|
13199
|
+
# iterated to max_iterations even though the agent had claimed done.
|
|
13200
|
+
# Fix: evaluate the claim EXACTLY ONCE here, capture it in
|
|
13201
|
+
# _completion_claimed, and have every arm test that variable. The
|
|
13202
|
+
# single call discards stdout (matching the prior call sites, which
|
|
13203
|
+
# also discarded it), so the task_completion_claim event still emits
|
|
13204
|
+
# exactly once. Consumption semantics are preserved: the claim is
|
|
13205
|
+
# consumed when evaluated; if a gate rejects it, the agent must
|
|
13206
|
+
# re-claim next iteration (see internal/DEMO-CLAIM-DROP-BUG.md).
|
|
13207
|
+
local _completion_claimed=0
|
|
13208
|
+
if check_completion_promise "$iter_output"; then
|
|
13209
|
+
_completion_claimed=1
|
|
13210
|
+
fi
|
|
13211
|
+
# MEDIUM-3: this completion-promise route evaluates the council hard
|
|
13212
|
+
# gates (evidence + held-out) without the council_evaluate freshness
|
|
13213
|
+
# step, so the held-out gate could read stale verification statuses
|
|
13214
|
+
# (and a stale reservation). Re-verify the checklist ONCE here, but
|
|
13215
|
+
# only when a completion claim is actually present (mirror the
|
|
13216
|
+
# check_completion_promise condition used by the gate chain below) so
|
|
13217
|
+
# verification does not run every iteration. Type-guarded and
|
|
13218
|
+
# best-effort: failure must never block the completion path.
|
|
13219
|
+
if [ "$_completion_claimed" = 1 ] && type council_reverify_checklist &>/dev/null; then
|
|
13220
|
+
council_reverify_checklist 2>/dev/null || true
|
|
13221
|
+
fi
|
|
13222
|
+
if [ -n "$_gate_block_for_completion" ] && [ "$_completion_claimed" = 1 ]; then
|
|
12994
13223
|
log_warn "Completion claim rejected: code review is BLOCKED for this iteration (Critical/High findings). Fix review issues before completion."
|
|
12995
13224
|
log_warn " Review details under .loki/quality/reviews/ ; gate_failures=${gate_failures}"
|
|
12996
13225
|
_gate_block_for_completion=""
|
|
@@ -13005,11 +13234,24 @@ if __name__ == "__main__":
|
|
|
13005
13234
|
# LOKI_EVIDENCE_GATE=0 (council_evidence_gate returns 0 immediately
|
|
13006
13235
|
# when disabled, so this branch never fires). Gate output (reason +
|
|
13007
13236
|
# opt-out hint) is printed by council_evidence_gate itself.
|
|
13008
|
-
elif
|
|
13237
|
+
elif [ "$_completion_claimed" = 1 ] && type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
|
|
13009
13238
|
log_warn "Completion claim rejected: evidence gate found no proof of completion (empty diff vs run-start SHA, or red tests)."
|
|
13010
13239
|
log_warn " Details under .loki/council/evidence-block.json ; opt out with LOKI_EVIDENCE_GATE=0"
|
|
13011
13240
|
# Fall through; keep iterating until there is real evidence.
|
|
13012
|
-
|
|
13241
|
+
# v7.28.0: the held-out spec-eval gate must also guard the DEFAULT
|
|
13242
|
+
# completion-promise route, not only the interval-gated council path
|
|
13243
|
+
# (council_evaluate). Otherwise an agent can self-assert "done" and
|
|
13244
|
+
# exit as completion_promise_fulfilled while a held-out acceptance
|
|
13245
|
+
# check is failing, bypassing the anti-reward-hacking gate entirely.
|
|
13246
|
+
# Mirrors the evidence-gate block above. Opt-out: the gate's own
|
|
13247
|
+
# LOKI_HELDOUT_GATE=0 (council_heldout_gate returns 0 immediately
|
|
13248
|
+
# when disabled or when no held-out items are reserved, so this
|
|
13249
|
+
# branch never fires). Gate output is printed by council_heldout_gate.
|
|
13250
|
+
elif [ "$_completion_claimed" = 1 ] && type council_heldout_gate &>/dev/null && ! council_heldout_gate; then
|
|
13251
|
+
log_warn "Completion claim rejected: held-out spec-eval gate found failing held-out acceptance check(s)."
|
|
13252
|
+
log_warn " Details under .loki/council/heldout-block.json ; opt out with LOKI_HELDOUT_GATE=0"
|
|
13253
|
+
# Fall through; keep iterating until the held-out checks pass.
|
|
13254
|
+
elif [ "$_completion_claimed" = 1 ]; then
|
|
13013
13255
|
echo ""
|
|
13014
13256
|
if [ -n "$COMPLETION_PROMISE" ]; then
|
|
13015
13257
|
log_header "COMPLETION PROMISE FULFILLED: $COMPLETION_PROMISE"
|
|
@@ -13383,10 +13625,19 @@ check_human_intervention() {
|
|
|
13383
13625
|
if [ -f "$loki_dir/signals/COUNCIL_REVIEW_REQUESTED" ]; then
|
|
13384
13626
|
log_info "Council force-review requested from dashboard"
|
|
13385
13627
|
rm -f "$loki_dir/signals/COUNCIL_REVIEW_REQUESTED"
|
|
13628
|
+
# MEDIUM-3: this route evaluates the council hard gates directly without
|
|
13629
|
+
# the council_evaluate freshness step, so re-verify the checklist ONCE
|
|
13630
|
+
# before the gate chain to restore that invariant (refreshes held-out
|
|
13631
|
+
# statuses and repairs a stale reservation). Type-guarded, best-effort.
|
|
13632
|
+
if type council_reverify_checklist &>/dev/null; then
|
|
13633
|
+
council_reverify_checklist 2>/dev/null || true
|
|
13634
|
+
fi
|
|
13386
13635
|
if type council_checklist_gate &>/dev/null && ! council_checklist_gate; then
|
|
13387
13636
|
log_info "Council force-review: blocked by checklist hard gate"
|
|
13388
13637
|
elif type council_evidence_gate &>/dev/null && ! council_evidence_gate; then
|
|
13389
13638
|
log_info "Council force-review: blocked by evidence hard gate"
|
|
13639
|
+
elif type council_heldout_gate &>/dev/null && ! council_heldout_gate; then
|
|
13640
|
+
log_info "Council force-review: blocked by held-out spec-eval hard gate"
|
|
13390
13641
|
elif type council_vote &>/dev/null && council_vote; then
|
|
13391
13642
|
log_header "COMPLETION COUNCIL: FORCE REVIEW - PROJECT COMPLETE"
|
|
13392
13643
|
# BUG #17 fix: Write COMPLETED marker, generate council report, and
|