npm - loki-mode - Versions diffs - 7.31.0 → 7.32.0 - Mend

loki-mode 7.31.0 → 7.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/autonomy/mcp-launch.sh CHANGED Viewed

@@ -113,15 +113,22 @@ _ml_python() {
 # `--check-sdk` probe, which runs the exact loader the server uses and exits 0
 # only when FastMCP loaded.
 #
-# Critical: we set PYTHONPATH to the install root and DO NOT cd into it, so the
-# probe exercises the SAME module resolution as the real launch (which preserves
-# the user's cwd). The redirect of stdin from /dev/null is insurance: if the
-# pip SDK's own `mcp.server` were ever reached, its stub starts a stdio receive
-# loop; the EOF makes it exit instead of hanging.
+# Critical: we probe with the SAME FILE-EXEC form the launch uses
+# (`"$root/mcp/server.py"`, NOT `-m mcp.server`), with PYTHONPATH set to the
+# install root and WITHOUT cd-ing into it, so the probe exercises byte-identical
+# module resolution to the real launch (which preserves the user's cwd). This
+# matters because `-m mcp.server` puts the user's cwd at sys.path[0] AHEAD of
+# PYTHONPATH=$root, so a cwd that happens to contain a regular `mcp/` python
+# package would shadow Loki's server during the probe (false SDK-missing) while
+# the file-exec launch -- immune to cwd shadowing -- would succeed. Probing by
+# file path keeps probe and launch resolving the IDENTICAL module. The redirect
+# of stdin from /dev/null is insurance: if the pip SDK's own `mcp.server` were
+# ever reached, its stub starts a stdio receive loop; the EOF makes it exit
+# instead of hanging.
 _ml_sdk_importable() {
     local py="$1" root="$2"
     PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" \
-        "$py" -m mcp.server --check-sdk </dev/null >/dev/null 2>&1
+        "$py" "$root/mcp/server.py" --check-sdk </dev/null >/dev/null 2>&1
 }
 # _ml_print_manual <root> <venv>: print the honest manual install commands.
@@ -135,7 +142,7 @@ _ml_print_manual() {
     printf 'Install the MCP server dependencies manually:\n' >&2
     printf "  python3 -m venv '%s'\n" "$venv" >&2
     printf "  '%s/bin/pip' install -r '%s/mcp/requirements.txt'\n" "$venv" "$root" >&2
-    printf "  PYTHONPATH='%s' '%s/bin/python' -m mcp.server\n" "$root" "$venv" >&2
+    printf "  PYTHONPATH='%s' '%s/bin/python' '%s/mcp/server.py'\n" "$root" "$venv" "$root" >&2
 }
 _ml_help() {
@@ -189,7 +196,8 @@ EOF
 # mcp_launch_main: dispatcher invoked by cmd_mcp() (autonomy/loki) or directly.
 mcp_launch_main() {
     # Split argv into launcher-owned flags (consumed here) and server argv
-    # (forwarded verbatim to `python -m mcp.server`). The server's argparse only
+    # (forwarded verbatim to the file-exec launch `python "$root/mcp/server.py"`).
+    # The server's argparse only
     # accepts --transport/--port/--check-sdk; forwarding a launcher flag like
     # --yes would make it abort with exit 2, so launcher flags MUST be stripped.
     # A bare `--` ends launcher parsing: everything after it is forwarded as-is
@@ -255,20 +263,23 @@ mcp_launch_main() {
     fi
     # 3. If the venv already has the SDK, use it directly. The server is launched
-    #    with PYTHONPATH=$root (NOT by cd-ing) so the user's cwd is preserved for
+    #    by FILE PATH ($root/mcp/server.py) rather than `-m mcp.server`, with
+    #    PYTHONPATH=$root (NOT by cd-ing) so the user's cwd is preserved for
     #    .loki resolution; see _ml_sdk_importable for why.
-    #    Known narrow residual: if the user's cwd itself contains a Python
-    #    package literally named mcp/ with a server submodule, python -m puts
-    #    the cwd ahead of PYTHONPATH and that package wins. Essentially never
-    #    true for real projects; documented rather than fought.
+    #    Running the file directly avoids the runpy RuntimeWarning that `-m`
+    #    emits (the local mcp/ package is imported during SDK-namespace setup
+    #    before runpy executes mcp.server). server.py uses only absolute imports
+    #    (e.g. `from mcp._sdk_loader import ...`), which resolve via PYTHONPATH=$root
+    #    under file execution. File-exec also removes the old narrow cwd-shadowing
+    #    residual: an explicit path can never be shadowed by a cwd `mcp/` package.
     if [ -x "$venv_py" ] && _ml_sdk_importable "$venv_py" "$root"; then
-        exec env PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" "$venv_py" -m mcp.server "$@"
+        exec env PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" "$venv_py" "$root/mcp/server.py" "$@"
     fi
     # 4. If the BASE python already has the SDK (e.g. user pip-installed it),
     #    use it -- no venv needed.
     if _ml_sdk_importable "$base_py" "$root"; then
-        exec env PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" "$base_py" -m mcp.server "$@"
+        exec env PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" "$base_py" "$root/mcp/server.py" "$@"
     fi
     # 5. SDK missing. Decide whether we may bootstrap.
@@ -374,7 +385,7 @@ mcp_launch_main() {
         return 2
     fi
     printf "%sMCP dependencies ready. Launching server over stdio ...%s\n" "$_ML_BOLD" "$_ML_NC" >&2
-    exec env PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" "$venv_py" -m mcp.server "$@"
+    exec env PYTHONPATH="$root${PYTHONPATH:+:$PYTHONPATH}" "$venv_py" "$root/mcp/server.py" "$@"
 }
 # Executed directly (tests, manual): run the dispatcher.

package/autonomy/run.sh CHANGED Viewed

@@ -12328,13 +12328,30 @@ except Exception as exc:
             # helpers (which expect tier names) resolve correctly. Unknown
             # model strings are passed through as-is; provider loaders fall
             # back to a sane default.
-            case "${LOKI_SESSION_MODEL:-sonnet}" in
+            #
+            # Normalize case + surrounding whitespace BEFORE the match so
+            # 'OPUS' and ' opus ' resolve identically to 'opus'. We do NOT use
+            # loki_normalize_model_alias here: that helper is the narrow
+            # OVERRIDE-file allowlist (haiku|sonnet|opus|fable) and would strip
+            # the documented tier-name pins (planning|development|fast) to
+            # empty, collapsing them onto the default tier. The session pin
+            # legitimately accepts tier names (skills/model-selection.md), and
+            # the estimator + dashboard mirror this exact tier route, so the
+            # canonical session-pin rule is trim+lowercase WITHOUT the alias
+            # allowlist. Interior whitespace is preserved (so 'fab le' stays a
+            # junk value that falls through the '*' default arm), matching the
+            # estimator/dashboard ports.
+            local _session_pin="${LOKI_SESSION_MODEL:-sonnet}"
+            _session_pin="${_session_pin#"${_session_pin%%[![:space:]]*}"}"
+            _session_pin="${_session_pin%"${_session_pin##*[![:space:]]}"}"
+            _session_pin="$(printf '%s' "$_session_pin" | tr '[:upper:]' '[:lower:]')"
+            case "$_session_pin" in
                 opus)   CURRENT_TIER="planning" ;;
                 sonnet) CURRENT_TIER="development" ;;
                 haiku)  CURRENT_TIER="fast" ;;
                 fable)  CURRENT_TIER="fable" ;;
-                planning|development|fast) CURRENT_TIER="${LOKI_SESSION_MODEL}" ;;
-                *)      CURRENT_TIER="${LOKI_SESSION_MODEL}" ;;
+                planning|development|fast) CURRENT_TIER="$_session_pin" ;;
+                *)      CURRENT_TIER="$_session_pin" ;;
             esac
         fi
         # Architect opt-in (LOKI_FABLE_ARCHITECT=1): route ONLY the first

package/bin/loki CHANGED Viewed

@@ -65,6 +65,21 @@ elif [ "${BUN_FROM_SOURCE:-0}" = "1" ] || [ "${BUN_FROM_SOURCE:-}" = "true" ]; t
     fi
 elif [ -f "$REPO_ROOT/loki-ts/dist/loki.js" ]; then
     BUN_CLI="$REPO_ROOT/loki-ts/dist/loki.js"
+    # Stale-dist freshness guard. dist/loki.js is gitignored (loki-ts/.gitignore)
+    # and rebuilt by package.json's prepack and the release Docker build, so on
+    # released channels (npm/Docker/brew) src/cli.ts is absent and this guard is
+    # a no-op -- dist is always current there. On a DEV machine / worktree the
+    # gitignored dist can predate the current dispatcher: e.g. a new shim->Bun
+    # route (`report kpis`) added in src that the old dist bundle does not know,
+    # which would make the canonical form fail with "Unknown command" while a
+    # deprecated alias the old dist still knows silently works -- the exact
+    # deprecation inversion the report-kpis route exists to prevent. So when src
+    # exists AND is newer than dist (`-nt`, available on bash 3.2), prefer the src
+    # form so dev runs never execute a stale dispatcher. Released channels have no
+    # src, so they keep using dist unchanged.
+    if [ -f "$REPO_ROOT/loki-ts/src/cli.ts" ] && [ "$REPO_ROOT/loki-ts/src/cli.ts" -nt "$REPO_ROOT/loki-ts/dist/loki.js" ]; then
+        BUN_CLI="$REPO_ROOT/loki-ts/src/cli.ts"
+    fi
 elif [ -f "$REPO_ROOT/loki-ts/src/cli.ts" ]; then
     BUN_CLI="$REPO_ROOT/loki-ts/src/cli.ts"
 else
@@ -131,6 +146,43 @@ if [ "${1:-}" = "trust" ]; then
     done
 fi
+# CLI consolidation (Phase B): `report kpis` is the canonical form of the
+# Bun-only KPI snapshot. The `report` noun is otherwise bash-owned (every other
+# subcommand -- session/metrics/cost/export/share/dogfood -- routes to bash
+# cmd_report), so it is NOT in the Bun allowlist below. But kpis has no bash
+# implementation (it reuses the canonical cost arithmetic in runner/budget.ts),
+# so the canonical `report kpis` must reach the Bun handler -- otherwise the
+# canonical form would print the honest "requires Bun" message on a Bun machine
+# while the deprecated `kpis` alias actually worked, inverting the deprecation.
+# Route `report kpis` to Bun when `kpis` is the report SUBCOMMAND, i.e. the FIRST
+# non-flag token after `report`. This satisfies `report kpis`, `report kpis
+# --json`, and `report --json kpis` (the flag-anywhere orderings the trust-detail
+# precedent established), while NOT hijacking `kpis` when it appears as a
+# positional VALUE of a different report subcommand. `report export json kpis`
+# (kpis = the export output filename) must keep working exactly as on main
+# (v7.31.0): exit 0, file `kpis` created. So we scan past leading flags, take the
+# first real token, and only route to Bun if it is literally `kpis`. Fire the
+# same cli_command telemetry the Bun case-arm below fires (command=report), so a
+# Bun-routed `report kpis` is not invisible to usage analytics (the v7.8.2 parity
+# the case-arm comment documents). Backgrounded, FD-detached, opt-out honored by
+# loki_telemetry itself.
+if [ "${1:-}" = "report" ]; then
+    _report_first_sub=""
+    for _report_arg in "${@:2}"; do
+        case "$_report_arg" in
+            -*) continue ;;
+            *) _report_first_sub="$_report_arg"; break ;;
+        esac
+    done
+    if [ "$_report_first_sub" = "kpis" ]; then
+        if command -v curl &>/dev/null && [ -f "$REPO_ROOT/autonomy/telemetry.sh" ]; then
+            ( SCRIPT_DIR="$REPO_ROOT/autonomy"; source "$SCRIPT_DIR/telemetry.sh" 2>/dev/null && loki_telemetry "cli_command" "command=${1:-}" 2>/dev/null ) >/dev/null 2>&1 </dev/null &
+            disown 2>/dev/null || true
+        fi
+        exec bun "$BUN_CLI" "$@"
+    fi
+fi
 # Commands ported in Phase 2 -- route to Bun. Everything else goes to bash.
 # Two-token routes (provider show/list, memory list/index) match on the first
 # token only; the Bun dispatcher handles subcommand routing internally.

package/dashboard/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ Modules:
     control: Session control API (start/stop/pause/resume)
 """
-__version__ = "7.31.0"
+__version__ = "7.32.0"
 # Expose the control app for easy import
 try:

package/dashboard/server.py CHANGED Viewed

@@ -2233,6 +2233,29 @@ def _normalize_session_model(raw: str | None) -> str:
     return val if val in _SESSION_MODEL_ALLOWLIST else ""
+# Session-pin allowlist is BROADER than the override-file allowlist above.
+# run.sh's session-pin case (run.sh:12331) accepts the four model aliases AND
+# the three raw tier names (planning|development|fast) -- documented at
+# skills/model-selection.md:8. The OVERRIDE file / POST path keeps the narrow
+# _SESSION_MODEL_ALLOWLIST because that value is fed straight to `claude
+# --model`, where tier names are not valid. The session pin is a tier route, so
+# tier names ARE valid pins.
+_SESSION_PIN_ALLOWLIST = _SESSION_MODEL_ALLOWLIST + ("planning", "development", "fast")
+def _normalize_session_pin(raw: str | None) -> str:
+    """Normalize a LOKI_SESSION_MODEL pin value (aliases + raw tier names).
+    Mirrors run.sh's session-pin case: trim + lowercase, accept the four model
+    aliases and the three tier names. Interior whitespace is preserved (so
+    "fab le" stays junk and falls through to the default tier, exactly like the
+    runner's "*" arm). Use this for the session-pin (no-override) derivation;
+    use _normalize_session_model for the override-file / POST path.
+    """
+    val = (raw or "").strip().lower()
+    return val if val in _SESSION_PIN_ALLOWLIST else ""
 # Provider-config model resolution mirror.
 #
 # SYNC: This is a byte-faithful python port of the claude provider's tier->model
@@ -2268,6 +2291,17 @@ def _provider_model_development() -> str:
     )
+def _provider_model_planning() -> str:
+    # claude.sh:65 -> LOKI_CLAUDE_MODEL_PLANNING > LOKI_MODEL_PLANNING > opus.
+    # CLAUDE_DEFAULT_PLANNING is always opus (LOKI_ALLOW_HAIKU lowers only the
+    # development and fast defaults, not planning).
+    return (
+        os.environ.get("LOKI_CLAUDE_MODEL_PLANNING")
+        or os.environ.get("LOKI_MODEL_PLANNING")
+        or "opus"
+    )
 def _clamp_to_max_tier(alias: str) -> str:
     """Apply the operator LOKI_MAX_TIER ceiling to a model alias.
@@ -2297,44 +2331,102 @@ def _clamp_to_max_tier(alias: str) -> str:
     return alias
+def _resolve_session_pin(alias: str) -> str:
+    """Resolve a session-pin alias the way the runner's NO-OVERRIDE path does.
+    The runner does NOT feed a session pin straight to --model. It maps the alias
+    to an abstract TIER (run.sh:12331 -- opus->planning, sonnet->development,
+    haiku->fast, fable->fable) and resolves that tier through
+    resolve_model_for_tier (claude.sh:353), then applies
+    loki_apply_max_tier_clamp(model, REAL_tier). This DIFFERS from
+    _clamp_to_max_tier (the override-path clamp): a 'sonnet' SESSION pin
+    dispatches OPUS (development tier -> PROVIDER_MODEL_DEVELOPMENT=opus on stock
+    config), whereas a 'sonnet' OVERRIDE file dispatches sonnet (fed straight to
+    --model). Use this for the no-override `default`/`effective` derivation so the
+    dashboard reports the model the run actually dispatches on the default path.
+    SYNC: byte-faithful with run.sh's session-pin case + claude.sh
+    resolve_model_for_tier + loki_apply_max_tier_clamp, and with the estimator's
+    _resolve_session_pin in autonomy/loki. Locked by the session-pin parity matrix
+    in tests/test-model-override.sh.
+    """
+    pin_tier = {
+        "opus": "planning",
+        "sonnet": "development",
+        "haiku": "fast",
+        "fable": "fable",
+        # Raw tier-name pins (run.sh:12336 passthrough arm) map to their own
+        # tier, NOT through the alias table. pin=fast -> fast tier ->
+        # PROVIDER_MODEL_FAST, matching the runner's dispatch instead of
+        # collapsing onto development.
+        "planning": "planning",
+        "development": "development",
+        "fast": "fast",
+    }.get((alias or "").strip().lower(), "development")
+    if pin_tier == "planning":
+        model = _provider_model_planning()
+    elif pin_tier == "fast":
+        model = _provider_model_fast()
+    elif pin_tier == "fable":
+        model = "fable"
+    else:  # development (and the unknown-alias '*' fallthrough)
+        model = _provider_model_development()
+    max_tier = (os.environ.get("LOKI_MAX_TIER") or "").strip().lower()
+    if not max_tier:
+        return model
+    if max_tier == "haiku":
+        return _provider_model_fast()
+    if max_tier == "sonnet":
+        # claude.sh sonnet-cap downgrades planning/fable tiers (or a fable model)
+        # to PROVIDER_MODEL_DEVELOPMENT; development/fast pass through.
+        if pin_tier in ("planning", "fable") or model == "fable":
+            return _provider_model_development()
+        return model
+    if max_tier == "opus":
+        return "opus" if model == "fable" else model
+    return model
 @app.get("/api/session/model", dependencies=[Depends(auth.require_scope("read"))])
 async def get_session_model():
     """Report the live run's model override and the effective default.
     `override` is the alias currently written to .loki/state/model-override
-    (None when no override is active). `default` is the session model the run
-    falls back to when there is no override (LOKI_SESSION_MODEL or the catalog
-    default). `effective` is the model the next iteration will actually use,
-    after the LOKI_MAX_TIER cost ceiling is applied (so the dashboard never
-    reports a model the run would clamp down).
-    The clamp resolves through the SAME provider config the runner uses
-    (LOKI_ALLOW_HAIKU plus the LOKI_CLAUDE_MODEL_FAST/DEVELOPMENT and
-    LOKI_MODEL_FAST/DEVELOPMENT overrides): _clamp_to_max_tier mirrors
-    providers/claude.sh loki_apply_max_tier_clamp byte-for-byte (locked by the
-    resolver parity matrix in tests/test-model-override.sh). So for the OVERRIDE
-    case -- the feature this endpoint exists for -- the reported `effective` model
-    equals the model the runner's mid-flight override path dispatches, given the
-    same environment.
+    (None when no override is active). `default` is the session pin alias the run
+    falls back to when there is no override (LOKI_SESSION_MODEL or "sonnet").
+    `effective` is the model the next iteration will actually DISPATCH, resolved
+    on the SAME route the runner uses for the active case, so the dashboard never
+    reports a model that differs from what the run runs:
+      - OVERRIDE active: the runner feeds the alias straight to --model via
+        loki_apply_max_tier_clamp(alias, alias). `effective` = _clamp_to_max_tier
+        (the override-path clamp). A "sonnet" override dispatches sonnet.
+      - NO override (session pin): the runner maps the pin through a tier
+        (opus->planning, sonnet->development, haiku->fast) and resolves the tier
+        through PROVIDER_MODEL_* (then the cost-ceiling clamp). `effective` =
+        _resolve_session_pin. A "sonnet" pin dispatches OPUS (development tier ->
+        PROVIDER_MODEL_DEVELOPMENT=opus on stock config).
+    Both routes resolve through the SAME provider config the runner uses
+    (LOKI_ALLOW_HAIKU plus the LOKI_CLAUDE_MODEL_PLANNING/FAST/DEVELOPMENT and
+    LOKI_MODEL_* overrides) and the SAME LOKI_MAX_TIER ceiling, mirroring
+    providers/claude.sh byte-for-byte. The agreement (estimator == dashboard ==
+    runner) on BOTH routes -- including the no-override stock path -- is locked by
+    the cross-route cases and the session-pin parity matrix in
+    tests/test-model-override.sh. (Before task 568 the no-override path applied the
+    override-path clamp to the pin, so a stock "sonnet" pin reported "sonnet" while
+    the run dispatched opus; that gap is now closed.)
     KNOWN LIMITATION (cross-process env divergence): the resolution reads
     LOKI_MAX_TIER, LOKI_ALLOW_HAIKU, LOKI_SESSION_MODEL and the model-override env
     vars from the DASHBOARD process's environment, which is usually a different
     process than the live run. So if the run was launched with a different
     environment than the dashboard, the no-override `default`/`effective` may not
-    reflect the run's real pinned tier or ceiling (e.g. a run pinned to opus still
-    reads "sonnet" here). The override case reads the run's own state file, so its
-    alias is always accurate and the clamp is exact whenever the dashboard shares
+    reflect the run's real pinned tier or ceiling (e.g. a run launched with
+    LOKI_SESSION_MODEL=opus while the dashboard's env has no pin still reads the
+    default here). The override case reads the run's own state file, so its alias
+    is always accurate and the resolution is exact whenever the dashboard shares
     the run's environment.
-    SCOPE NOTE (no-override default path): when there is no override, `effective`
-    applies the override-path clamp to the session default. The runner's
-    no-override route instead maps a session pin through a tier
-    (resolve_model_for_tier: opus->planning, sonnet->development), which can differ
-    from the override-path clamp in one cell (e.g. an opus pin under sonnet cap +
-    LOKI_ALLOW_HAIKU: the tier route yields sonnet, the override-path clamp yields
-    opus). That session-pin modeling gap is pre-existing and out of scope here;
-    the override case this endpoint serves is exact.
     """
     override = None
     try:
@@ -2343,8 +2435,16 @@ async def get_session_model():
             override = _normalize_session_model(p.read_text()) or None
     except OSError:
         override = None
-    default = _normalize_session_model(os.environ.get("LOKI_SESSION_MODEL")) or "sonnet"
-    effective = _clamp_to_max_tier(override or default)
+    # Session pin accepts tier names too (run.sh:12336), so use the broader
+    # session-pin normalizer here (NOT the narrow override allowlist).
+    default = _normalize_session_pin(os.environ.get("LOKI_SESSION_MODEL")) or "sonnet"
+    # Resolve on the route the runner will actually take: override-path clamp when
+    # an override file is present, session-pin tier route otherwise. This closes
+    # the task-568 stock-path gap (a "sonnet" pin dispatches opus).
+    if override is not None:
+        effective = _clamp_to_max_tier(override)
+    else:
+        effective = _resolve_session_pin(default)
     return {
         "override": override,
         "default": default,

package/docs/INSTALLATION.md CHANGED Viewed

@@ -2,7 +2,7 @@
 The flagship product of [Autonomi](https://www.autonomi.dev/). Loki Mode is a spec-driven autonomous builder with a built-in trust layer that takes any spec to a deployed product and verifies completion with evidence (quality gates plus a completion council), not just a "done" claim. Complete installation instructions for all platforms and use cases.
-**Version:** v7.31.0
+**Version:** v7.32.0
 ---