npm - loki-mode - Versions diffs - 7.40.0 → 7.41.1 - Mend

loki-mode 7.40.0 → 7.41.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/app-runner.sh +138 -3
package/autonomy/completion-council.sh +14 -2
package/autonomy/council-v2.sh +10 -1
package/autonomy/grill.sh +9 -1
package/autonomy/lib/claude-flags.sh +321 -0
package/autonomy/lib/voter-agents.sh +7 -1
package/autonomy/loki +70 -6
package/autonomy/run.sh +418 -16
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +95 -2
package/dashboard/static/index.html +58 -32
package/docs/INSTALLATION.md +15 -1
package/loki-ts/dist/loki.js +2 -2
package/mcp/__init__.py +1 -1
package/package.json +1 -1
package/plugins/loki-mode/.claude-plugin/plugin.json +1 -1
package/skills/quality-gates.md +70 -0

package/SKILL.md CHANGED Viewed

@@ -3,7 +3,7 @@ name: loki-mode
 description: Autonomous spec-driven build system with a built-in trust layer. It does not call work done until it is verified (RARV-C closure loop, 11 quality gates, completion council, verified-completion evidence gate). Triggers on "Loki Mode". Takes a spec (PRD, GitHub issue, OpenAPI doc, etc.) to deployed product with minimal human intervention. Provider-agnostic. Requires --dangerously-skip-permissions flag.
 ---
-# Loki Mode v7.40.0
+# Loki Mode v7.41.1
 **You are an autonomous agent. You make decisions. You do not ask questions. You do not stop.**
@@ -398,4 +398,4 @@ See `CHANGELOG.md` entries [7.5.7], [7.5.8], [7.5.13] for the per-fix list and r
 ---
-**v7.40.0 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**
+**v7.41.1 | [Autonomi](https://www.autonomi.dev/) flagship product | ~260 lines core**

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 7.40.0
1	+ 7.41.1

package/autonomy/app-runner.sh CHANGED Viewed

@@ -136,6 +136,121 @@ HEALTH_EOF
     mv "$tmp_file" "$_APP_RUNNER_DIR/health.json"
 }
+# Re-derive a detection.json field (type/command) so we can rewrite it after a
+# port reconcile without threading those values through globals. Echoes the raw
+# string value (empty on miss). Mirrors the grep-based read style used by
+# app_runner_status.
+_read_detection_field() {
+    local field="$1"
+    [ -f "$_APP_RUNNER_DIR/detection.json" ] || return 0
+    grep -o "\"${field}\": *\"[^\"]*\"" "$_APP_RUNNER_DIR/detection.json" 2>/dev/null \
+        | head -1 | sed 's/.*"\([^"]*\)"$/\1/'
+}
+# Rewrite detection.json with the reconciled port, preserving type/command.
+_rewrite_detection_port() {
+    local d_type d_command
+    d_type=$(_read_detection_field "type")
+    d_command=$(_read_detection_field "command")
+    [ -n "$d_type" ] || return 0
+    _write_detection "$d_type" "$d_command"
+}
+# Fix #2 (finding #597): reconcile the recorded port with the port the app
+# ACTUALLY bound, using the listen line in app.log as the source of truth. This
+# corrects the dashboard Live Preview even when the app ignores PORT and picks
+# its own port. Bounded poll: returns as soon as a listen line is found, and
+# never runs for docker (compose URLs come from published-port mapping) or when
+# no port was recorded. Default window LOKI_APP_PORT_RECONCILE_SECS (default 12)
+# at 0.5s intervals. On no match within the window the recorded port is kept (no
+# regression). Stdout: nothing; mutates _APP_RUNNER_PORT / _APP_RUNNER_URL and
+# rewrites state.json + detection.json only when the real port differs.
+_app_runner_reconcile_port() {
+    [ "$_APP_RUNNER_IS_DOCKER" != true ] || return 0
+    [ -n "$_APP_RUNNER_PORT" ] && [ "$_APP_RUNNER_PORT" -gt 0 ] 2>/dev/null || return 0
+    local log_file="$_APP_RUNNER_DIR/app.log"
+    # Fast path: if the recorded port already serves HTTP, the app honored our
+    # chosen port (fix #1 worked) or otherwise bound it -- nothing to reconcile,
+    # and we avoid the poll latency entirely. Covers quiet-but-serving apps that
+    # never log a recognizable listen line.
+    if command -v curl >/dev/null 2>&1 && \
+       curl -sf -o /dev/null -m 2 "http://localhost:${_APP_RUNNER_PORT}/" 2>/dev/null; then
+        return 0
+    fi
+    local max_secs="${LOKI_APP_PORT_RECONCILE_SECS:-12}"
+    [[ "$max_secs" =~ ^[0-9]+$ ]] || max_secs=12
+    local max_iter=$(( max_secs * 2 ))
+    [ "$max_iter" -gt 0 ] || max_iter=1
+    local real_port="" iter=0
+    while [ "$iter" -lt "$max_iter" ]; do
+        if [ -f "$log_file" ]; then
+            real_port=$(_parse_listen_port "$log_file")
+            [ -n "$real_port" ] && break
+        fi
+        # Stop early if the process already died (failed start): nothing to wait for.
+        if [ -n "$_APP_RUNNER_PID" ] && ! kill -0 "$_APP_RUNNER_PID" 2>/dev/null; then
+            break
+        fi
+        sleep 0.5
+        iter=$(( iter + 1 ))
+    done
+    [ -n "$real_port" ] || return 0
+    if [ "$real_port" != "$_APP_RUNNER_PORT" ]; then
+        log_info "App Runner: reconciled port $_APP_RUNNER_PORT -> $real_port (from app.log listen line)"
+        _APP_RUNNER_PORT="$real_port"
+        _APP_RUNNER_URL="http://localhost:${real_port}"
+        _rewrite_detection_port
+    fi
+    return 0
+}
+# Parse the actual bound port from an app log file. Scans known listen-line
+# shapes in priority order and returns the LAST (most recent) plausible port,
+# tolerating ANSI color codes that dev servers emit. Validates 1-65535. Echoes
+# the port or nothing.
+_parse_listen_port() {
+    local file="$1"
+    [ -f "$file" ] || return 0
+    # Strip ANSI SGR sequences (\e[...m) so color-wrapped URLs still match.
+    local clean
+    clean=$(sed -E $'s/\x1b\\[[0-9;]*m//g' "$file" 2>/dev/null) || clean=$(cat "$file" 2>/dev/null)
+    [ -n "$clean" ] || return 0
+    local candidate=""
+    # 1) Explicit URL with a port: http://host:PORT  (most reliable).
+    candidate=$(printf '%s\n' "$clean" \
+        | grep -oiE 'https?://[a-z0-9.\-]+:[0-9]{1,5}' \
+        | grep -oE ':[0-9]{1,5}' | tr -d ':' | tail -1)
+    # 2) A number anchored to the literal word "port": "port 8080", "port=3000",
+    #    "port: 5000". This runs BEFORE the bare host:port scan so a clock-style
+    #    timestamp on the same line (e.g. "12:30:45 ... port 8080") cannot win.
+    if [ -z "$candidate" ]; then
+        candidate=$(printf '%s\n' "$clean" \
+            | grep -ioE 'port[ =:]+[0-9]{1,5}' \
+            | grep -oE '[0-9]{1,5}' | tail -1)
+    fi
+    # 3) Keyword listen lines with a real host token before the colon:
+    #    "localhost:5173", "0.0.0.0:8080", "127.0.0.1:3000". Requiring a letter
+    #    or a dot immediately left of the colon excludes "HH:MM" timestamps,
+    #    which have a digit there.
+    if [ -z "$candidate" ]; then
+        candidate=$(printf '%s\n' "$clean" \
+            | grep -iE 'listen|running on|ready|started|serving|server' \
+            | grep -oiE '[a-z.][a-z0-9.\-]*:[0-9]{1,5}' \
+            | grep -oE ':[0-9]{1,5}' | tr -d ':' | tail -1)
+    fi
+    [ -n "$candidate" ] || return 0
+    # Validate range 1-65535.
+    if [ "$candidate" -ge 1 ] 2>/dev/null && [ "$candidate" -le 65535 ] 2>/dev/null; then
+        printf '%s\n' "$candidate"
+    fi
+}
 # Rotate app.log if it exceeds max lines
 _rotate_app_log() {
     local log_file="$_APP_RUNNER_DIR/app.log"
@@ -606,6 +721,21 @@ app_runner_start() {
     log_step "App Runner: starting application ($_APP_RUNNER_METHOD on port $_APP_RUNNER_PORT)..."
     _rotate_app_log
+    # Fix #1 (finding #597): pass Loki's chosen port to the app via the env so the
+    # app honors it instead of binding its own default (e.g. a Node app reading
+    # `process.env.PORT || 4000` would otherwise bind 4000 while Loki recorded the
+    # guessed 3000, leaving the dashboard Live Preview pointed at a dead port).
+    # We export PORT plus the common ecosystem aliases. An app that ignores these
+    # vars is unaffected; an ignored env var is harmless by definition. We do NOT
+    # set HOST/BIND -- changing the bind address can break apps. For docker (which
+    # gets its port via published-port mapping, not the child env) this is a no-op
+    # at the binary boundary, so we only export for the direct-exec path.
+    local _port_env_prefix=""
+    if [ "$_APP_RUNNER_IS_DOCKER" != true ] && \
+       [ -n "$_APP_RUNNER_PORT" ] && [ "$_APP_RUNNER_PORT" -gt 0 ] 2>/dev/null; then
+        _port_env_prefix="export PORT=$_APP_RUNNER_PORT HTTP_PORT=$_APP_RUNNER_PORT SERVER_PORT=$_APP_RUNNER_PORT APP_PORT=$_APP_RUNNER_PORT; "
+    fi
     # Start the process in a new process group
     if command -v setsid >/dev/null 2>&1; then
         _APP_RUNNER_HAS_SETSID=true
@@ -615,7 +745,7 @@ app_runner_start() {
         # Note: $_APP_RUNNER_METHOD has passed _validate_app_command (whitelist).
         # The `--` after `bash -lc` prevents flag injection if the assembled
         # script string ever begins with a `-`.
-        (cd "$dir" && setsid bash -lc -- 'echo $$ > "'"$_pgid_file"'"; exec '"$_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
+        (cd "$dir" && setsid bash -lc -- "$_port_env_prefix"'echo $$ > "'"$_pgid_file"'"; exec '"$_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
         local _subshell_pid=$!
         # Wait briefly for the pgid file to appear, then read the real PGID
         local _pgid_wait=0
@@ -633,7 +763,7 @@ app_runner_start() {
         _APP_RUNNER_HAS_SETSID=false
         # Note: $_APP_RUNNER_METHOD has passed _validate_app_command (whitelist).
         # The `--` after `bash -lc` prevents flag injection.
-        (cd "$dir" && bash -lc -- "$_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
+        (cd "$dir" && bash -lc -- "${_port_env_prefix}exec $_APP_RUNNER_METHOD" >> "$_APP_RUNNER_DIR/app.log" 2>&1) &
         _APP_RUNNER_PID=$!
     fi
     # Register with central PID registry if available
@@ -675,8 +805,13 @@ app_runner_start() {
             return 1
         fi
     elif kill -0 "$_APP_RUNNER_PID" 2>/dev/null; then
+        # Reconcile recorded port with the port the app actually bound (finding
+        # #597), so state.json / detection.json / the preview URL point at the
+        # live port even when the app ignored PORT. Mutates globals before the
+        # state write below. Bounded; no-op when the app honored the chosen port.
+        _app_runner_reconcile_port
         _write_app_state "running"
-        log_info "App Runner: application started (PID: $_APP_RUNNER_PID)"
+        log_info "App Runner: application started (PID: $_APP_RUNNER_PID) on port $_APP_RUNNER_PORT"
         return 0
     else
         log_error "App Runner: application failed to start"

package/autonomy/completion-council.sh CHANGED Viewed

@@ -1775,7 +1775,15 @@ ISSUES: CRITICAL:description (optional, one per line per issue)"
                 if type loki_review_guard_enabled >/dev/null 2>&1 && loki_review_guard_enabled; then
                     _cm_argv+=("--disallowedTools" "$(loki_review_guard_denylist)")
                 fi
-                verdict=$(echo "$prompt" | claude "${_cm_argv[@]}" -p 2>/dev/null | tail -20)
+                # caveman HARD-SUPPRESS (parsed output): this council vote is
+                # parsed for "VOTE: APPROVE|REJECT|CANNOT_VALIDATE". A globally-
+                # active caveman would compress/reword that line and silently flip
+                # the vote to the default REJECT, corrupting completion detection.
+                # Disable caveman UNCONDITIONALLY with CAVEMAN_DEFAULT_MODE=off.
+                # Set inline (not via a helper) so the carve-out holds even when
+                # this file is sourced standalone and the helpers are out of scope.
+                # Inlined on `claude` only (does not cross the pipe). No-op absent.
+                verdict=$(echo "$prompt" | env CAVEMAN_DEFAULT_MODE=off claude "${_cm_argv[@]}" -p 2>/dev/null | tail -20)
             fi
             ;;
         codex)
@@ -1870,7 +1878,11 @@ REASON: your reasoning"
                 if type loki_review_guard_enabled >/dev/null 2>&1 && loki_review_guard_enabled; then
                     _co_argv+=("--disallowedTools" "$(loki_review_guard_denylist)")
                 fi
-                verdict=$(echo "$prompt" | claude "${_co_argv[@]}" -p 2>/dev/null | tail -20)
+                # caveman HARD-SUPPRESS (parsed output): the devil's-advocate
+                # (contrarian) vote is parsed for "VOTE:". Disable caveman
+                # unconditionally so compression cannot flip the contrarian vote.
+                # Inlined on `claude` only (does not cross the pipe). No-op absent.
+                verdict=$(echo "$prompt" | env CAVEMAN_DEFAULT_MODE=off claude "${_co_argv[@]}" -p 2>/dev/null | tail -20)
             fi
             ;;
         codex)

package/autonomy/council-v2.sh CHANGED Viewed

@@ -276,7 +276,16 @@ Respond ONLY with a valid JSON object. No markdown fencing."
                 if type loki_review_guard_enabled >/dev/null 2>&1 && loki_review_guard_enabled; then
                     _c2_argv+=("--disallowedTools" "$(loki_review_guard_denylist)")
                 fi
-                result=$(echo "$full_prompt" | claude "${_c2_argv[@]}" -p 2>/dev/null || echo '{"verdict":"REJECT","reasoning":"review execution failed","issues":[]}')
+                # caveman HARD-SUPPRESS (parsed output, v7.41.0): this reviewer
+                # verdict is captured and parsed for the JSON "verdict" field. A
+                # globally-active caveman would compress/reword that JSON and
+                # silently flip the verdict to the REJECT fallback. The tree-wide
+                # default-off export in claude-flags.sh already covers this (the
+                # whole subprocess tree inherits CAVEMAN_DEFAULT_MODE=off); the
+                # inline prefix here is belt-and-suspenders so the carve-out is
+                # self-documenting and robust to sourcing order. No-op when caveman
+                # is absent.
+                result=$(echo "$full_prompt" | CAVEMAN_DEFAULT_MODE=off claude "${_c2_argv[@]}" -p 2>/dev/null || echo '{"verdict":"REJECT","reasoning":"review execution failed","issues":[]}')
             else
                 result='{"verdict":"REJECT","reasoning":"reviewer CLI unavailable","issues":[]}'
             fi

package/autonomy/grill.sh CHANGED Viewed

@@ -204,8 +204,16 @@ grill_invoke_provider() {
             if type loki_review_guard_enabled >/dev/null 2>&1 && loki_review_guard_enabled; then
                 _gr_argv+=("--disallowedTools" "$(loki_review_guard_denylist)")
             fi
+            # caveman HARD-SUPPRESS (parsed output, v7.41.0): the grill output is
+            # parsed downstream by loki-grill (and written to report.md as the
+            # hardest spec questions). Treat it as parsed: a globally-active
+            # caveman would compress/reword the questions. The tree-wide default-off
+            # export in claude-flags.sh (sourced at grill.sh:45) already covers
+            # this; the inline `env` prefix is belt-and-suspenders. `env` is used
+            # (not a bare VAR=val prefix) because the call goes through
+            # _grill_with_timeout, where the first token is exec'd as the command.
             out="$(printf '%s' "$prompt" \
-                | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" claude "${_gr_argv[@]}" -p - 2>/dev/null)"
+                | _grill_with_timeout "${LOKI_GRILL_TIMEOUT:-180}" env CAVEMAN_DEFAULT_MODE=off claude "${_gr_argv[@]}" -p - 2>/dev/null)"
             if [ -z "$out" ]; then
                 _grill_err "provider returned no output (timeout or invocation error)"
                 return $GRILL_EXIT_ERROR

package/autonomy/lib/claude-flags.sh CHANGED Viewed

@@ -482,6 +482,327 @@ loki_workflows_enabled() {
     [ "${LOKI_USE_CLAUDE_WORKFLOWS:-0}" = "1" ]
 }
+# ---------- v7.x caveman output-token compressor gates ----------
+# caveman (https://github.com/JuliusBrussee/caveman, MIT, vendor-less pin) is a
+# Claude Code SKILL + SessionStart hook that instructs the model to compress its
+# OUTPUT TOKENS only (prose style: lite / full / ultra / wenyan), keeping all
+# technical substance. It wraps NO API, needs NO key, has NO network of its own.
+# Once installed it activates GLOBALLY in Claude Code via a SessionStart hook
+# that reads getDefaultMode() (env CAVEMAN_DEFAULT_MODE > repo .caveman config >
+# user config > "full") and, unless the mode is "off", injects its ruleset.
+#
+# THE MOAT RISK (central, why this is wired the way it is): Loki's trust gates
+# parse EXACT model prose -- council "VOTE: APPROVE|REJECT|CANNOT_VALIDATE", code
+# review "^VERDICT:", the legacy completion-promise grep, the evidence-gate
+# sentinels. A globally-active caveman would compress those subcall outputs and
+# silently flip a verdict to the default (REJECT / not-complete), corrupting the
+# loop. This is the same failure class as the --bare OAuth footgun documented at
+# claude-flags.sh:152-161.
+#
+# THE DESIGN (off by construction, not by convention):
+#   - ACTIVATE compression only on FREE-FORM generation (main RARV dev loop +
+#     read-only codebase-analysis): inline `CAVEMAN_DEFAULT_MODE=<level> claude`.
+#   - HARD-SUPPRESS on EVERY parsed-output subcall (council vote, ^VERDICT:
+#     review, adversarial probe, conflict-merge, USAGE regen): inline
+#     `CAVEMAN_DEFAULT_MODE=off claude`. The activate hook then deletes its flag
+#     and emits nothing (verified: `CAVEMAN_DEFAULT_MODE=off node
+#     caveman-activate.js` prints "OK" with no ruleset).
+#
+# Suppression is UNCONDITIONAL and UNGATED (see loki_caveman_suppress_env): it is
+# a harmless no-op env value when caveman is absent and the essential carve-out
+# when caveman is globally present (surface b, or a user's own install) even with
+# LOKI_CAVEMAN=0. NEVER gate suppression on supported/enabled -- that would leave
+# the trust gates unprotected exactly when a user has caveman on but Loki off.
+#
+# Disclosure (honest, no fabricated figures): caveman compresses OUTPUT tokens
+# only, not input/thinking; savings are real but bounded. There is no price API,
+# so we disclose the savings CLASS, never a dollar amount (same posture as the
+# ultrareview/workflows gates).
+# Version pin (vendor-less). Upgrade by bumping this. The upstream installer pins
+# its hook downloads to PINNED_REF = CAVEMAN_REF || 'v1.9.0' (a git tag), and the
+# curl|bash path delegates to `npx -y github:JuliusBrussee/caveman#<ref>`. We
+# default to 1.9.0 and derive the `v`-prefixed tag in the bootstrap helper.
+LOKI_CAVEMAN_VERSION="${LOKI_CAVEMAN_VERSION:-1.9.0}"
+# The compression level for free-form activation. Maps directly to caveman's
+# CAVEMAN_DEFAULT_MODE values: lite | full | ultra | wenyan | wenyan-lite |
+# wenyan-full | wenyan-ultra. Never "off" here -- "off" is the suppression value,
+# not an activation level.
+#
+# v7.x #593 -- INTELLIGENT AUTO-SELECTION (no new user knob): when the user does
+# NOT set LOKI_CAVEMAN_LEVEL explicitly, the level is INFERRED per-invocation from
+# the run's existing RARV-tier signal (see _loki_caveman_infer_level). When the
+# user DOES set it, that value overrides the inference entirely (opt-out escape
+# hatch). Capture set-ness BEFORE the ":-full" default clobbers it -- once the
+# default fills the var, "user set full" and "defaulted to full" are
+# indistinguishable, so the inference would silently never fire. ${var+set} is
+# non-empty only when the var was genuinely set (even to empty). The "full"
+# default is kept so the public var still reads "full" for external readers and
+# so a child re-source re-derives USERSET correctly (unexported default).
+if [ -z "${LOKI_CAVEMAN_LEVEL_USERSET+x}" ]; then
+    LOKI_CAVEMAN_LEVEL_USERSET="${LOKI_CAVEMAN_LEVEL+set}"
+fi
+LOKI_CAVEMAN_LEVEL="${LOKI_CAVEMAN_LEVEL:-full}"
+# ---------- DEFAULT-SUPPRESS: off by construction, tree-wide ----------
+# THE MOAT GUARANTEE (v7.41.0 council fix): instead of hand-enumerating every
+# parsed-output trust-gate subcall and remembering to prefix each with
+# CAVEMAN_DEFAULT_MODE=off (a missed site silently corrupts a verdict -- caveman
+# exits 0 with mangled prose and the `|| REJECT` fallback never fires), we flip
+# the ENTIRE process tree to suppressed at the one module every tree sources.
+#
+# claude-flags.sh is sourced by EVERY tree that can spawn a parsed claude
+# subcall: the run.sh orchestrator (via providers/claude.sh), grill.sh (standalone
+# `loki grill`), lib/voter-agents.sh (Phase C agent-dispatch voters), and the
+# loki standalone review/workflows commands (on-demand). council-v2.sh carries no
+# source of its own but only ever runs inside completion-council.sh, which is in
+# the run.sh tree, so it inherits this export too. Exporting off HERE makes the
+# whole spawned subprocess tree inherit suppression -- caveman's SessionStart
+# hook reads process.env CAVEMAN_DEFAULT_MODE -- closing council-v2.sh,
+# voter-agents.sh, grill.sh, every existing parsed subcall, and any FUTURE one by
+# construction. ACTIVATION on the handful of free-form generation sites overrides
+# this per-invocation (CAVEMAN_DEFAULT_MODE=<level> claude ...).
+#
+# Capture the user's pre-existing global CAVEMAN_DEFAULT_MODE BEFORE we clobber
+# it, so the activation path can respect (never RAISE) a user's lower level (see
+# loki_caveman_activate_env). Guarded on UNSET (not empty): a child process that
+# inherits our exported LOKI_CAVEMAN_USER_MODE="" (user had no global mode) and
+# re-sources this file must NOT recapture the now-exported CAVEMAN_DEFAULT_MODE=off
+# as the user mode. ${var+x} is empty only when var is genuinely unset, so the
+# capture runs exactly once across the whole process tree, never recapturing "off".
+if [ -z "${LOKI_CAVEMAN_USER_MODE+x}" ]; then
+    LOKI_CAVEMAN_USER_MODE="${CAVEMAN_DEFAULT_MODE:-}"
+fi
+export LOKI_CAVEMAN_USER_MODE
+export CAVEMAN_DEFAULT_MODE=off
+# ---------- v7.x #593 intelligent compression-level inference ----------
+# Infer the caveman compression level from the run's existing RARV-tier signal,
+# so the level is DECIDED by inspecting the work rather than asked of the user.
+# No new user input is introduced: the tier already drives effort/model selection
+# (loki_effort_for_tier, get_rarv_tier). On the bash route the tier is read from
+# LOKI_CURRENT_TIER (exported by run_autonomous each iteration); the TS mirror
+# (cavemanActivateEnv) receives the same tier vocabulary (planning|development|
+# fast) via call.tier, so both routes infer identically from the same signal.
+#
+# INFERENCE RULE (deterministic, conservative-for-accuracy):
+#   planning    (Reason phase -- architecture / design / nuanced reasoning) -> lite
+#   development (Act / Reflect -- implementation, the prior default)         -> full
+#   fast        (Verify phase -- testing / validation, more routine)         -> full
+#   unknown / empty tier                                                     -> full
+# The auto ceiling is "full": inference NEVER selects ultra. ultra is reachable
+# only via an explicit LOKI_CAVEMAN_LEVEL override (the opt-out escape hatch), so
+# the autonomous path can never compress hard enough to lose technical nuance.
+# "lite" on planning protects the highest-nuance output (architecture/design);
+# everything else stays at the established "full" default. When the tier is
+# unknown we pick the SAFER (established) "full", never something more aggressive.
+_loki_caveman_infer_level() {
+    local tier="${1:-${LOKI_CURRENT_TIER:-}}"
+    case "$tier" in
+        planning) printf '%s' "lite" ;;
+        *)        printf '%s' "full" ;;
+    esac
+}
+# Rank a caveman mode by compression aggressiveness for the no-raise comparison.
+# Higher number = more aggressive (drops more nuance). "off" is the floor; unknown
+# or empty modes rank as -1 (treated as "no opinion", so they never win a min()).
+# The wenyan-* variants mirror their plain counterparts' aggressiveness.
+_loki_caveman_level_rank() {
+    case "${1:-}" in
+        off)                      printf '0' ;;
+        lite|wenyan-lite)         printf '1' ;;
+        full|wenyan|wenyan-full)  printf '2' ;;
+        ultra|wenyan-ultra)       printf '3' ;;
+        *)                        printf '%s' '-1' ;;
+    esac
+}
+# Caveman config dir resolution mirrors caveman-config.js getConfigDir(): honors
+# CLAUDE_CONFIG_DIR for the flag file location used to detect an existing install.
+_loki_caveman_claude_dir() {
+    printf '%s' "${CLAUDE_CONFIG_DIR:-$HOME/.claude}"
+}
+# True (0) when caveman appears installed: its SessionStart hook file exists in
+# the resolved Claude config dir. Best-effort, read-only. We check the hook the
+# upstream installer writes to ~/.claude/hooks/caveman-activate.js (standalone)
+# OR a plugin install marker. Either presence means activation will fire.
+_loki_caveman_installed() {
+    local dir
+    dir="$(_loki_caveman_claude_dir)"
+    [ -f "$dir/hooks/caveman-activate.js" ] && return 0
+    # Plugin install: the activate hook lives under a plugin root; the flag file
+    # path is stable. Treat a prior-session flag file as a weaker install signal.
+    [ -f "$dir/.caveman-active" ] && return 0
+    return 1
+}
+# Capability gate: can caveman compression be USED on this run? Provider is
+# Claude AND the claude CLI is present AND caveman is installed-or-bootstrappable
+# AND it is not disabled by the LOKI_CAVEMAN knob. Returns 0 when usable, 1
+# otherwise (callers emit an honest message + degrade to an uncompressed run).
+# Mirrors loki_workflows_supported's shape (provider + CLI + not-disabled).
+loki_caveman_supported() {
+    # Provider must be Claude (Tier 1). caveman is Claude-Code-only.
+    [ "${LOKI_PROVIDER:-claude}" = "claude" ] || return 1
+    command -v claude >/dev/null 2>&1 || return 1
+    # Opt-out knob also suppresses the capability (no activation when off).
+    [ "${LOKI_CAVEMAN:-1}" = "0" ] && return 1
+    # Installed now, OR bootstrappable (node + npx present so the pin can install
+    # on demand). Either way activation can take effect this run or the next.
+    if _loki_caveman_installed; then
+        return 0
+    fi
+    command -v node >/dev/null 2>&1 && command -v npx >/dev/null 2>&1 && return 0
+    return 1
+}
+# Activation knob: is caveman compression ENABLED for free-form subcalls?
+# DEFAULT ON (LOKI_CAVEMAN unset or 1). Opt out with LOKI_CAVEMAN=0.
+#
+# CROSS-COUPLING GUARD (moat safety): when LOKI_LEGACY_COMPLETION_MATCH=true the
+# runner detects completion by grepping the MAIN-loop prose for the completion
+# promise (run.sh:9641). Compressing the main loop would mangle that prose and
+# break the legacy detector, so caveman activation is DISABLED whenever the
+# legacy prose-match path is in use. The default completion path (the
+# loki_complete_task MCP tool / COMPLETION_REQUESTED signal file) is immune to
+# compression, so the default config keeps caveman on.
+loki_caveman_enabled() {
+    [ "${LOKI_CAVEMAN:-1}" = "0" ] && return 1
+    [ "${LOKI_LEGACY_COMPLETION_MATCH:-false}" = "true" ] && return 1
+    return 0
+}
+# The activation env VALUE for a free-form subcall: the configured level, or
+# empty when activation is not warranted (caveman unsupported or disabled). The
+# caller inlines it as a per-invocation env prefix (NEVER `export` -- a persisted
+# export would bleed into later parsed subcalls and defeat the carve-out):
+#   _cm_lvl="$(loki_caveman_activate_env)"
+#   if [ -n "$_cm_lvl" ]; then
+#       CAVEMAN_DEFAULT_MODE="$_cm_lvl" claude ...   # free-form only
+#   else
+#       claude ...
+#   fi
+#
+# NO-RAISE (v7.41.0 R2 finding 4): the level returned is the configured Loki
+# level, EXCEPT we never silently RAISE a user who set a LOWER global caveman
+# level. If the user globally chose "lite" (less aggressive, preserves more
+# nuance) we honor "lite" rather than forcing "full" on their free-form output.
+# We only ever lower toward the user's preference, never above it; the activation
+# level itself is the conservative-for-accuracy ceiling. The user's global mode is
+# captured at source time into LOKI_CAVEMAN_USER_MODE before the default-off
+# export clobbers CAVEMAN_DEFAULT_MODE. Unknown / empty user modes (rank -1) are
+# ignored so a malformed value can never accidentally suppress activation.
+loki_caveman_activate_env() {
+    loki_caveman_supported || return 0
+    loki_caveman_enabled   || return 0
+    # #593: the level is the EXPLICIT user value when LOKI_CAVEMAN_LEVEL was set
+    # (override / opt-out escape hatch), else the INFERRED level from the RARV
+    # tier. The no-raise guard below then runs unchanged on this base, so an
+    # explicit level is still lowered toward a user's lower global mode exactly
+    # as before -- "override" means override the inference, not the no-raise.
+    local level
+    if [ -n "${LOKI_CAVEMAN_LEVEL_USERSET:-}" ]; then
+        level="${LOKI_CAVEMAN_LEVEL:-full}"
+    else
+        level="$(_loki_caveman_infer_level)"
+    fi
+    # Respect (never exceed) a user's explicitly-lower global level. A user who
+    # globally set CAVEMAN_DEFAULT_MODE=off opted OUT of compression entirely;
+    # honor that by activating nothing (empty -> bare claude invocation).
+    local user_mode="${LOKI_CAVEMAN_USER_MODE:-}"
+    if [ "$user_mode" = "off" ]; then
+        return 0
+    fi
+    if [ -n "$user_mode" ]; then
+        local user_rank level_rank
+        user_rank="$(_loki_caveman_level_rank "$user_mode")"
+        level_rank="$(_loki_caveman_level_rank "$level")"
+        # Only defer to the user when their mode is a recognized, lower level.
+        if [ "$user_rank" -ge 0 ] && [ "$level_rank" -ge 0 ] && [ "$user_rank" -lt "$level_rank" ]; then
+            level="$user_mode"
+        fi
+    fi
+    printf '%s' "$level"
+}
+# The suppression env VALUE for a parsed-output subcall: ALWAYS "off",
+# UNCONDITIONALLY. Not gated on supported/enabled (see the design note above):
+# it must hard-disable caveman on a trust-gate subcall even when a user has
+# caveman globally on but LOKI_CAVEMAN=0, and it is a harmless no-op env value
+# when caveman is absent. Every parsed-output call site uses this ONE helper so
+# the carve-out is uniform:
+#   CAVEMAN_DEFAULT_MODE="$(loki_caveman_suppress_env)" claude ...
+loki_caveman_suppress_env() {
+    printf '%s' "off"
+}
+# Idempotent on-demand bootstrap of caveman at the pinned version. Best-effort:
+# installs once per machine, caches a marker under .loki/ so repeat runs are a
+# no-op, degrades cleanly (run proceeds UNCOMPRESSED) with an honest stderr line
+# if anything is missing or the upstream installer is unreachable. NEVER blocks
+# or fails the run. Returns 0 if caveman is (now) installed, 1 on clean degrade.
+#
+# Opt out with LOKI_CAVEMAN_AUTO_BOOTSTRAP=0. Only attempts when provider==claude
+# and the LOKI_CAVEMAN knob is on.
+#
+# GLOBAL SIDE EFFECT (disclosed): caveman installs GLOBALLY -- the upstream
+# installer adds a SessionStart hook to ~/.claude/settings.json (or
+# $CLAUDE_CONFIG_DIR) that affects EVERY Claude Code session on this machine, not
+# only Loki runs. This is caveman's only install mode. The bootstrap therefore
+# runs the upstream installer exactly as a user's own `curl|bash` would; we do
+# not author or vendor any caveman file. The one-time stderr line below names
+# this so the operator is never surprised.
+#
+# HARDENING: the npx call is forced non-interactive (--non-interactive, plus
+# </dev/null so no stdin read can ever block) and time-bounded (timeout when
+# available) so a stalled network or an unexpected prompt can never hang a user's
+# first `loki start`. caveman's installer is already auto-non-interactive without
+# a TTY, but we belt-and-suspenders it.
+loki_caveman_bootstrap() {
+    [ "${LOKI_CAVEMAN:-1}" = "0" ] && return 1
+    [ "${LOKI_CAVEMAN_AUTO_BOOTSTRAP:-1}" = "0" ] && return 1
+    [ "${LOKI_PROVIDER:-claude}" = "claude" ] || return 1
+    # Already installed -> nothing to do.
+    if _loki_caveman_installed; then
+        return 0
+    fi
+    local ver="${LOKI_CAVEMAN_VERSION:-1.9.0}"
+    local marker_dir=".loki/state"
+    local marker="$marker_dir/caveman-bootstrap-${ver}.done"
+    # Cached attempt marker: do not re-attempt a failed install over and over
+    # within the same project tree (idempotent one-shot per pinned version).
+    if [ -f "$marker" ]; then
+        _loki_caveman_installed && return 0 || return 1
+    fi
+    if ! command -v node >/dev/null 2>&1 || ! command -v npx >/dev/null 2>&1; then
+        printf '%s\n' "[caveman] node>=18 + npx required to bootstrap; skipping (run proceeds uncompressed). Install Node or set LOKI_CAVEMAN=0 to silence." >&2
+        mkdir -p "$marker_dir" 2>/dev/null && : > "$marker" 2>/dev/null || true
+        return 1
+    fi
+    printf '%s\n' "[caveman] bootstrapping output-token compressor v${ver} (one-time, pinned). NOTE: caveman installs GLOBALLY (a Claude Code SessionStart hook in ~/.claude affecting every Claude Code session). Loki applies it only to free-form generation, NEVER to trust-gate subcalls. Opt out: LOKI_CAVEMAN=0." >&2
+    # Pin via the git tag (v-prefixed) on the npx ref AND CAVEMAN_REF so the
+    # downloaded hooks match the pinned release. Default install (no --all) wires
+    # the Claude Code hook for the detected `claude` CLI. A timeout backstops a
+    # network stall; </dev/null guarantees no interactive stdin read blocks.
+    local _cm_runner="npx"
+    if command -v timeout >/dev/null 2>&1; then
+        _cm_runner="timeout 120 npx"
+    fi
+    if CAVEMAN_REF="v${ver}" $_cm_runner -y "github:JuliusBrussee/caveman#v${ver}" -- --non-interactive >/dev/null 2>&1 </dev/null; then
+        mkdir -p "$marker_dir" 2>/dev/null && : > "$marker" 2>/dev/null || true
+        if _loki_caveman_installed; then
+            printf '%s\n' "[caveman] installed v${ver}." >&2
+            return 0
+        fi
+    fi
+    printf '%s\n' "[caveman] bootstrap unavailable (upstream unreachable, timed out, or install failed); run proceeds uncompressed." >&2
+    mkdir -p "$marker_dir" 2>/dev/null && : > "$marker" 2>/dev/null || true
+    return 1
+}
 # ---------------------------------------------------------------------------
 # Session-continuity Phase 2 (GitHub #165) -- LOKI_RESUME_SESSION recovery resume
 #

package/autonomy/lib/voter-agents.sh CHANGED Viewed

@@ -250,7 +250,13 @@ loki_council_dispatch_agents() {
     local rc=0
     local stderr_log="$COUNCIL_STATE_DIR/votes/dispatch-stderr-${iteration}.log"
     mkdir -p "$(dirname "$stderr_log")" 2>/dev/null || true
-    response=$(claude --dangerously-skip-permissions \
+    # caveman HARD-SUPPRESS (parsed output, v7.41.0): the response is parsed for
+    # findings[].vote against the JSON Schema. A globally-active caveman would
+    # compress/reword it and break the schema match or flip a vote. The tree-wide
+    # default-off export in claude-flags.sh (sourced above) already covers this;
+    # the inline prefix is belt-and-suspenders, self-documenting, and a no-op when
+    # caveman is absent.
+    response=$(CAVEMAN_DEFAULT_MODE=off claude --dangerously-skip-permissions \
                       -p "$prompt" \
                       --agents "$agents_json" \
                       --json-schema "$schema_path" 2>"$stderr_log") || rc=$?