loki-mode 7.26.0 → 7.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -11
- package/SKILL.md +2 -2
- package/VERSION +1 -1
- package/autonomy/completion-council.sh +25 -0
- package/autonomy/lib/trust_metrics.py +636 -0
- package/autonomy/loki +93 -0
- package/autonomy/run.sh +113 -5
- package/autonomy/verify.sh +1075 -0
- package/dashboard/__init__.py +1 -1
- package/dashboard/static/index.html +1 -1
- package/docs/COMPARISON.md +9 -9
- package/docs/COMPETITIVE-ANALYSIS.md +18 -37
- package/docs/INSTALLATION.md +1 -1
- package/docs/auto-claude-comparison.md +9 -6
- package/docs/certification/01-core-concepts/lesson.md +3 -3
- package/docs/competitive/emergence-others-analysis.md +1 -1
- package/docs/competitive/replit-lovable-analysis.md +1 -1
- package/docs/cursor-comparison.md +1 -1
- package/docs/prd-purple-lab-platform.md +1 -1
- package/docs/show-hn-post.md +2 -2
- package/loki-ts/dist/loki.js +2 -2
- package/mcp/__init__.py +1 -1
- package/package.json +1 -1
- package/providers/codex.sh +3 -2
- package/references/agent-types.md +9 -9
- package/references/agents.md +8 -8
- package/references/business-ops.md +1 -1
- package/references/competitive-analysis.md +1 -1
- package/skills/agents.md +3 -3
- package/skills/providers.md +3 -3
package/autonomy/loki
CHANGED
|
@@ -554,12 +554,14 @@ show_help() {
|
|
|
554
554
|
echo " projects Multi-project registry management"
|
|
555
555
|
echo " audit [cmd] Agent audit log and quality scanning (log|scan)"
|
|
556
556
|
echo " heal <path> Legacy system healing (archaeology, stabilize, modernize)"
|
|
557
|
+
echo " verify [base] Deterministic PR verification (Autonomi Verify MVP; CI-gate exit codes)"
|
|
557
558
|
echo " review [opts] Standalone code review with quality gates (diff, staged, PR, files)"
|
|
558
559
|
echo " optimize Optimize prompts based on session history"
|
|
559
560
|
echo " enterprise Enterprise feature management (tokens, OIDC)"
|
|
560
561
|
echo " metrics [opts] Session productivity report (--json, --last N, --save, --share)"
|
|
561
562
|
echo " cost [opts] Transparent cost view: per-run/project spend + budget (--json, --last N)"
|
|
562
563
|
echo " trust [--json] Visible trust trajectory: council/gate pass-rate + interventions over runs [R4]"
|
|
564
|
+
echo " trust-metrics Trust-layer metrics: evidence-block rate, gate distribution, council split, cost/verified (--json)"
|
|
563
565
|
echo " dogfood Show self-development statistics"
|
|
564
566
|
echo " secrets [cmd] API key status and validation (status|validate)"
|
|
565
567
|
echo " reset [target] Reset session state (all|retries|failed)"
|
|
@@ -11355,6 +11357,29 @@ with open(manifest_path, 'w') as f:
|
|
|
11355
11357
|
# Modernize legacy codebases incrementally without breaking existing behavior.
|
|
11356
11358
|
#===============================================================================
|
|
11357
11359
|
|
|
11360
|
+
# ---------------------------------------------------------------------------
|
|
11361
|
+
# loki verify - Autonomi Verify (Verification-as-a-Service MVP)
|
|
11362
|
+
#
|
|
11363
|
+
# Thin dispatcher that sources autonomy/verify.sh and delegates to its
|
|
11364
|
+
# verify_main(). The verification core is deliberately standalone (it does NOT
|
|
11365
|
+
# enter the autonomous iteration loop): it computes a PR-style merge-base diff
|
|
11366
|
+
# and runs deterministic gates against the current tree, emitting a verdict and
|
|
11367
|
+
# a consolidated evidence document. Deterministic-only in this MVP (no LLM
|
|
11368
|
+
# review). Exit code is propagated to the caller so the command is CI-gate
|
|
11369
|
+
# usable.
|
|
11370
|
+
# ---------------------------------------------------------------------------
|
|
11371
|
+
cmd_verify() {
|
|
11372
|
+
local verify_mod="$_LOKI_SCRIPT_DIR/verify.sh"
|
|
11373
|
+
if [ ! -f "$verify_mod" ]; then
|
|
11374
|
+
echo -e "${RED}Error: verify module not found at $verify_mod${NC}" >&2
|
|
11375
|
+
return 3
|
|
11376
|
+
fi
|
|
11377
|
+
# shellcheck source=/dev/null
|
|
11378
|
+
source "$verify_mod"
|
|
11379
|
+
verify_main "$@"
|
|
11380
|
+
return $?
|
|
11381
|
+
}
|
|
11382
|
+
|
|
11358
11383
|
cmd_heal_help() {
|
|
11359
11384
|
echo -e "${BOLD}loki heal${NC} - Legacy system healing (v6.67.0)"
|
|
11360
11385
|
echo ""
|
|
@@ -13502,6 +13527,9 @@ main() {
|
|
|
13502
13527
|
heal)
|
|
13503
13528
|
cmd_heal "$@"
|
|
13504
13529
|
;;
|
|
13530
|
+
verify)
|
|
13531
|
+
cmd_verify "$@"
|
|
13532
|
+
;;
|
|
13505
13533
|
migrate)
|
|
13506
13534
|
cmd_migrate "$@"
|
|
13507
13535
|
;;
|
|
@@ -13529,6 +13557,9 @@ main() {
|
|
|
13529
13557
|
trust)
|
|
13530
13558
|
cmd_trust "$@"
|
|
13531
13559
|
;;
|
|
13560
|
+
trust-metrics)
|
|
13561
|
+
cmd_trust_metrics "$@"
|
|
13562
|
+
;;
|
|
13532
13563
|
syslog)
|
|
13533
13564
|
cmd_syslog "$@"
|
|
13534
13565
|
;;
|
|
@@ -18775,6 +18806,68 @@ cmd_trust() {
|
|
|
18775
18806
|
python3 "$trust_mod" --loki-dir "$loki_dir" ${pass_args[@]+"${pass_args[@]}"}
|
|
18776
18807
|
}
|
|
18777
18808
|
|
|
18809
|
+
# Trust-layer metrics (benchmark program section 3): the four AVAILABLE-TODAY
|
|
18810
|
+
# metrics nobody else can publish, computed for THIS project from the durable
|
|
18811
|
+
# trust-events.jsonl log plus the .loki/proofs/ corpus. Honest by construction:
|
|
18812
|
+
# each metric reports its own n= and says "not instrumented" rather than a
|
|
18813
|
+
# fabricated zero. Single project only.
|
|
18814
|
+
cmd_trust_metrics() {
|
|
18815
|
+
local pass_args=()
|
|
18816
|
+
while [[ $# -gt 0 ]]; do
|
|
18817
|
+
case "$1" in
|
|
18818
|
+
--help|-h)
|
|
18819
|
+
echo -e "${BOLD}loki trust-metrics${NC} - Trust-layer metrics (single project)"
|
|
18820
|
+
echo ""
|
|
18821
|
+
echo "Usage: loki trust-metrics [options]"
|
|
18822
|
+
echo ""
|
|
18823
|
+
echo "Computes the four trust-layer metrics from this project's"
|
|
18824
|
+
echo ".loki artifacts and emits .loki/metrics/trust-metrics.json"
|
|
18825
|
+
echo "plus a human-readable table:"
|
|
18826
|
+
echo " 1. Evidence-gate block rate (runs that caught an unproven"
|
|
18827
|
+
echo " 'done' claim before honoring completion)"
|
|
18828
|
+
echo " 2. Gate failure distribution per run (median, p90, per-gate)"
|
|
18829
|
+
echo " 3. Council rejection / split-verdict rate"
|
|
18830
|
+
echo " 4. Cost-per-VERIFIED-task (local verified denominator)"
|
|
18831
|
+
echo ""
|
|
18832
|
+
echo "Sources: .loki/metrics/trust-events.jsonl (durable event log)"
|
|
18833
|
+
echo "and .loki/proofs/<id>/proof.json. A metric with no source"
|
|
18834
|
+
echo "artifact is reported 'not instrumented', never a fake 0."
|
|
18835
|
+
echo ""
|
|
18836
|
+
echo "Options:"
|
|
18837
|
+
echo " --json Machine-readable JSON output"
|
|
18838
|
+
echo " --no-cache Do not write trust-metrics.json"
|
|
18839
|
+
echo " --help, -h Show this help"
|
|
18840
|
+
echo ""
|
|
18841
|
+
echo "Scope: SINGLE PROJECT only. An --all-projects registry"
|
|
18842
|
+
echo "aggregator is out of scope; run this inside each project."
|
|
18843
|
+
exit 0
|
|
18844
|
+
;;
|
|
18845
|
+
--json) pass_args+=("--json"); shift ;;
|
|
18846
|
+
--no-cache) pass_args+=("--no-cache"); shift ;;
|
|
18847
|
+
--all-projects)
|
|
18848
|
+
echo -e "${RED}--all-projects is out of scope (single project only).${NC}"
|
|
18849
|
+
echo "Run 'loki trust-metrics' inside each project directory."
|
|
18850
|
+
exit 2
|
|
18851
|
+
;;
|
|
18852
|
+
*) echo -e "${RED}Unknown option: $1${NC}"; echo "Run 'loki trust-metrics --help' for usage."; exit 1 ;;
|
|
18853
|
+
esac
|
|
18854
|
+
done
|
|
18855
|
+
|
|
18856
|
+
if ! command -v python3 &>/dev/null; then
|
|
18857
|
+
echo -e "${RED}python3 is required for trust metrics${NC}"
|
|
18858
|
+
exit 1
|
|
18859
|
+
fi
|
|
18860
|
+
|
|
18861
|
+
local tm_mod="$_LOKI_SCRIPT_DIR/lib/trust_metrics.py"
|
|
18862
|
+
if [ ! -f "$tm_mod" ]; then
|
|
18863
|
+
echo -e "${RED}trust_metrics.py not found at $tm_mod${NC}"
|
|
18864
|
+
exit 1
|
|
18865
|
+
fi
|
|
18866
|
+
|
|
18867
|
+
local loki_dir="${LOKI_DIR:-.loki}"
|
|
18868
|
+
python3 "$tm_mod" --loki-dir "$loki_dir" ${pass_args[@]+"${pass_args[@]}"}
|
|
18869
|
+
}
|
|
18870
|
+
|
|
18778
18871
|
# Transparent cost view (R3): per-run + per-project spend, model routing, and
|
|
18779
18872
|
# budget status with the 80% warn line. Reuses efficiency_cost.collect_efficiency
|
|
18780
18873
|
# for the current-run aggregate (single source of truth) and reads .loki/proofs/
|
package/autonomy/run.sh
CHANGED
|
@@ -1226,6 +1226,94 @@ emit_event_json() {
|
|
|
1226
1226
|
log_debug "Event: $event_type - $json_data"
|
|
1227
1227
|
}
|
|
1228
1228
|
|
|
1229
|
+
# Trust-layer metrics event writer (benchmark program section 3). Appends one
|
|
1230
|
+
# durable record per trust event to .loki/metrics/trust-events.jsonl via the
|
|
1231
|
+
# Python writer (single source of truth for the JSONL schema). This is ADDITIVE
|
|
1232
|
+
# and purely a side effect: it writes nothing to stdout, ignores all errors, and
|
|
1233
|
+
# never alters control flow or any caller's return value. The single-state
|
|
1234
|
+
# control files (evidence-block.json, gate-failure-count.json) are untouched;
|
|
1235
|
+
# this log exists because those files are erased on the successful-run path,
|
|
1236
|
+
# losing exactly the self-correction events the trust metrics publish.
|
|
1237
|
+
# Resolve a stable, UNIQUE-PER-RUN id for the trust event log. The cross-run
|
|
1238
|
+
# denominators (block rate, gate distribution) require ids that are distinct per
|
|
1239
|
+
# run. A persisted per-run file is the source of truth, NOT LOKI_SESSION_ID:
|
|
1240
|
+
# - On `loki start ./prd.md`, LOKI_SESSION_ID is unset entirely.
|
|
1241
|
+
# - On `loki run <issue>`, LOKI_SESSION_ID is the issue NUMBER, which is stable
|
|
1242
|
+
# across re-runs by design (so `loki stop <n>` works); using it would merge
|
|
1243
|
+
# every re-run of the same issue into one bucket and skew the rates.
|
|
1244
|
+
# So a fresh run always MINTS a new unique id into .loki/state/trust-run-id, and
|
|
1245
|
+
# every later event in that run reads it back. LOKI_SESSION_ID is only a
|
|
1246
|
+
# last-resort fallback when no minted file exists (e.g. an event fired before
|
|
1247
|
+
# any run_start, which the aggregator then treats as un-instrumented anyway).
|
|
1248
|
+
# Events never join to proof.json (Metrics 1-3 are events-only, Metric 4 is
|
|
1249
|
+
# proofs-only), so intra-log uniqueness is the only requirement.
|
|
1250
|
+
# Usage: _loki_trust_run_id [--new]
|
|
1251
|
+
_loki_trust_run_id() {
|
|
1252
|
+
local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
|
|
1253
|
+
local id_file="$loki_dir/state/trust-run-id"
|
|
1254
|
+
if [ "${1:-}" = "--new" ]; then
|
|
1255
|
+
# Fresh run: mint a new unique id (epoch + pid + short random) and
|
|
1256
|
+
# persist it as the source of truth for this run's events.
|
|
1257
|
+
local new_id
|
|
1258
|
+
new_id="run-$(date -u +%Y%m%d%H%M%S)-$$-${RANDOM:-0}"
|
|
1259
|
+
mkdir -p "$loki_dir/state" 2>/dev/null || true
|
|
1260
|
+
printf '%s' "$new_id" > "$id_file" 2>/dev/null || true
|
|
1261
|
+
printf '%s' "$new_id"
|
|
1262
|
+
return 0
|
|
1263
|
+
fi
|
|
1264
|
+
# Read path: the minted per-run file wins over LOKI_SESSION_ID so a resume
|
|
1265
|
+
# in a separate process (no exported LOKI_TRUST_RUN_ID) still resolves to
|
|
1266
|
+
# the same run, and a stable issue-number session id never collapses re-runs.
|
|
1267
|
+
if [ -s "$id_file" ]; then
|
|
1268
|
+
cat "$id_file" 2>/dev/null || true
|
|
1269
|
+
return 0
|
|
1270
|
+
fi
|
|
1271
|
+
if [ -n "${LOKI_SESSION_ID:-}" ]; then
|
|
1272
|
+
printf '%s' "$LOKI_SESSION_ID"
|
|
1273
|
+
return 0
|
|
1274
|
+
fi
|
|
1275
|
+
# No persisted id and no session id: empty -> writer records "unknown".
|
|
1276
|
+
printf '%s' ""
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
# Usage: record_trust_event_bash <event_type> [key=value ...]
|
|
1280
|
+
# Pass LOKI_TRUST_RUN_ID in the environment to override the resolved id (the
|
|
1281
|
+
# run_start site sets it to the freshly minted id so the first event matches).
|
|
1282
|
+
record_trust_event_bash() {
|
|
1283
|
+
local event_type="$1"
|
|
1284
|
+
shift || true
|
|
1285
|
+
local tm_mod="$SCRIPT_DIR/lib/trust_metrics.py"
|
|
1286
|
+
[ -f "$tm_mod" ] || return 0
|
|
1287
|
+
command -v python3 >/dev/null 2>&1 || return 0
|
|
1288
|
+
local loki_dir="${LOKI_DIR:-${TARGET_DIR:-.}/.loki}"
|
|
1289
|
+
local run_id="${LOKI_TRUST_RUN_ID:-$(_loki_trust_run_id)}"
|
|
1290
|
+
# Pass kv pairs as argv so Python parses (no shell JSON building). All
|
|
1291
|
+
# values stay strings except where the reader coerces (iteration -> int).
|
|
1292
|
+
_TM_LOKI_DIR="$loki_dir" \
|
|
1293
|
+
_TM_MOD_PATH="$tm_mod" \
|
|
1294
|
+
_TM_EVENT_TYPE="$event_type" \
|
|
1295
|
+
_TM_RUN_ID="$run_id" \
|
|
1296
|
+
_TM_ITERATION="${ITERATION_COUNT:-0}" \
|
|
1297
|
+
python3 - "$@" <<'TRUST_EVENT_PY' >/dev/null 2>&1 || true
|
|
1298
|
+
import os, sys, importlib.util
|
|
1299
|
+
spec = importlib.util.spec_from_file_location("trust_metrics", os.environ["_TM_MOD_PATH"])
|
|
1300
|
+
tm = importlib.util.module_from_spec(spec)
|
|
1301
|
+
spec.loader.exec_module(tm)
|
|
1302
|
+
fields = {}
|
|
1303
|
+
for arg in sys.argv[1:]:
|
|
1304
|
+
if "=" in arg:
|
|
1305
|
+
k, v = arg.split("=", 1)
|
|
1306
|
+
fields[k] = v
|
|
1307
|
+
tm.record_trust_event(
|
|
1308
|
+
os.environ["_TM_LOKI_DIR"],
|
|
1309
|
+
os.environ["_TM_EVENT_TYPE"],
|
|
1310
|
+
run_id=os.environ.get("_TM_RUN_ID", "") or None,
|
|
1311
|
+
iteration=os.environ.get("_TM_ITERATION", "0"),
|
|
1312
|
+
**fields,
|
|
1313
|
+
)
|
|
1314
|
+
TRUST_EVENT_PY
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1229
1317
|
# v7.0.2: Bash helper to emit a managed-agents event to the dashboard's
|
|
1230
1318
|
# managed event log (.loki/managed/events.ndjson). Mirrors the Python
|
|
1231
1319
|
# emit_managed_event helper so bash callers can land events in the same
|
|
@@ -2916,7 +3004,7 @@ spawn_worktree_session() {
|
|
|
2916
3004
|
>> "$log_file" 2>&1 || _wt_exit=$?
|
|
2917
3005
|
;;
|
|
2918
3006
|
codex)
|
|
2919
|
-
codex exec --full-auto \
|
|
3007
|
+
codex exec --full-auto --skip-git-repo-check \
|
|
2920
3008
|
"Loki Mode: $task_prompt. Read .loki/CONTINUITY.md for context." \
|
|
2921
3009
|
>> "$log_file" 2>&1 || _wt_exit=$?
|
|
2922
3010
|
;;
|
|
@@ -3117,7 +3205,7 @@ Output ONLY the resolved file content with no conflict markers. No explanations.
|
|
|
3117
3205
|
resolution=$(claude --dangerously-skip-permissions -p "$conflict_prompt" --output-format text 2>/dev/null)
|
|
3118
3206
|
;;
|
|
3119
3207
|
codex)
|
|
3120
|
-
resolution=$(codex exec --full-auto "$conflict_prompt" 2>/dev/null)
|
|
3208
|
+
resolution=$(codex exec --full-auto --skip-git-repo-check "$conflict_prompt" 2>/dev/null)
|
|
3121
3209
|
;;
|
|
3122
3210
|
cline)
|
|
3123
3211
|
resolution=$(invoke_cline_capture "$conflict_prompt" 2>/dev/null)
|
|
@@ -6551,6 +6639,13 @@ print(counts[gate_name])
|
|
|
6551
6639
|
loki_crash_friction "gate_failure" "gate=${gate_name} consecutive=${count}" >/dev/null 2>&1 || true
|
|
6552
6640
|
fi
|
|
6553
6641
|
|
|
6642
|
+
# Trust-metrics: append a durable per-failure record so the gate-failure
|
|
6643
|
+
# distribution survives clear_gate_failure (which resets the running
|
|
6644
|
+
# counter). CRITICAL: this function's stdout IS its return value, so the
|
|
6645
|
+
# write is fully stdout-suppressed and best-effort; it cannot change the
|
|
6646
|
+
# echoed count or any gate behavior.
|
|
6647
|
+
record_trust_event_bash "gate_failure" "gate=${gate_name}" "consecutive=${count}" >/dev/null 2>&1 || true
|
|
6648
|
+
|
|
6554
6649
|
echo "$count"
|
|
6555
6650
|
}
|
|
6556
6651
|
|
|
@@ -7500,7 +7595,7 @@ BUILD_PROMPT
|
|
|
7500
7595
|
--output-format text > "$review_output" 2>/dev/null
|
|
7501
7596
|
;;
|
|
7502
7597
|
codex)
|
|
7503
|
-
codex exec --full-auto "$prompt_text" \
|
|
7598
|
+
codex exec --full-auto --skip-git-repo-check "$prompt_text" \
|
|
7504
7599
|
> "$review_output" 2>/dev/null
|
|
7505
7600
|
;;
|
|
7506
7601
|
cline)
|
|
@@ -7715,7 +7810,7 @@ ADVERSARIAL_EOF
|
|
|
7715
7810
|
;;
|
|
7716
7811
|
codex)
|
|
7717
7812
|
if command -v codex &>/dev/null; then
|
|
7718
|
-
codex exec --full-auto "$adversarial_prompt" \
|
|
7813
|
+
codex exec --full-auto --skip-git-repo-check "$adversarial_prompt" \
|
|
7719
7814
|
> "$result_file" 2>/dev/null || true
|
|
7720
7815
|
fi
|
|
7721
7816
|
;;
|
|
@@ -11900,6 +11995,19 @@ run_autonomous() {
|
|
|
11900
11995
|
_LOKI_RUN_START_SHA="$(cat "$_start_sha_file" 2>/dev/null || echo "")"
|
|
11901
11996
|
export _LOKI_RUN_START_SHA
|
|
11902
11997
|
|
|
11998
|
+
# Trust-metrics instrumentation marker: record one run_start event per
|
|
11999
|
+
# fresh run so the trust-metrics denominator counts ONLY instrumented runs.
|
|
12000
|
+
# This is what lets the aggregator distinguish "0 blocks measured" from
|
|
12001
|
+
# "this run predates instrumentation" (the central honesty rule). Additive,
|
|
12002
|
+
# best-effort, stdout-silent; never affects control flow. Mint a fresh
|
|
12003
|
+
# per-run id here and export it so every later event in this run shares it
|
|
12004
|
+
# (LOKI_SESSION_ID is absent on the `loki start` path).
|
|
12005
|
+
if [ "${ITERATION_COUNT:-0}" -eq 0 ]; then
|
|
12006
|
+
LOKI_TRUST_RUN_ID="$(_loki_trust_run_id --new)"
|
|
12007
|
+
export LOKI_TRUST_RUN_ID
|
|
12008
|
+
record_trust_event_bash "run_start" "start_sha=${_LOKI_RUN_START_SHA:-}" 2>/dev/null || true
|
|
12009
|
+
fi
|
|
12010
|
+
|
|
11903
12011
|
# Notify dashboard of active project directory (for AI Chat cross-directory usage)
|
|
11904
12012
|
if command -v curl &>/dev/null; then
|
|
11905
12013
|
local project_cwd
|
|
@@ -12586,7 +12694,7 @@ if __name__ == "__main__":
|
|
|
12586
12694
|
# Uses dynamic tier from RARV phase (tier_param already set above)
|
|
12587
12695
|
{ LOKI_CODEX_REASONING_EFFORT="$tier_param" \
|
|
12588
12696
|
CODEX_MODEL_REASONING_EFFORT="$tier_param" \
|
|
12589
|
-
codex exec --full-auto \
|
|
12697
|
+
codex exec --full-auto --skip-git-repo-check \
|
|
12590
12698
|
"$prompt" 2>&1 | tee -a "$log_file" "$agent_log" "$iter_output"; \
|
|
12591
12699
|
} && exit_code=0 || exit_code=$?
|
|
12592
12700
|
;;
|