npm - loki-mode - Versions diffs - 7.30.0 → 7.31.0 - Mend

loki-mode 7.30.0 → 7.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +4 -2
package/SKILL.md +2 -2
package/VERSION +1 -1
package/autonomy/context-tracker.py +8 -0
package/autonomy/loki +462 -105
package/autonomy/mcp-launch.sh +124 -22
package/autonomy/run.sh +148 -1
package/bin/loki +19 -0
package/dashboard/__init__.py +1 -1
package/dashboard/server.py +226 -1
package/dashboard/static/index.html +105 -39
package/docs/INSTALLATION.md +1 -1
package/docs/competitive/replit-lovable-analysis.md +1 -1
package/loki-ts/data/model-pricing.json +1 -0
package/loki-ts/dist/loki.js +233 -232
package/mcp/__init__.py +1 -1
package/mcp/_sdk_loader.py +157 -0
package/mcp/lsp_proxy.py +61 -61
package/mcp/server.py +14 -129
package/package.json +1 -1
package/providers/claude.sh +76 -19
package/providers/model_catalog.json +9 -0
package/skills/model-selection.md +44 -0

package/dashboard/server.py CHANGED Viewed

@@ -2159,6 +2159,21 @@ class FocusRequest(BaseModel):
     project_dir: str
+# Mid-flight model switching: the allowlist of aliases a live run may switch to.
+# MUST stay identical to the read-side allowlist in run.sh (the override file is
+# fed straight into `claude --model`). Fable is the top-tier advisory model at
+# 2x Opus cost; the UI shows that. `None`/empty clears the override.
+_SESSION_MODEL_ALLOWLIST = ("haiku", "sonnet", "opus", "fable")
+class SessionModelRequest(BaseModel):
+    """Schema for setting (or clearing) the live run's model override."""
+    # Disable Pydantic's protected "model_" namespace so a field literally named
+    # "model" does not emit a warning.
+    model_config = ConfigDict(protected_namespaces=())
+    model: str | None = None
 @app.post("/api/focus", dependencies=[Depends(auth.require_scope("control"))])
 async def set_focus(request: FocusRequest):
     """Set the active project directory for .loki/ resolution.
@@ -2202,6 +2217,187 @@ async def clear_focus():
     return {"project_dir": None, "loki_dir": str(_get_loki_dir())}
+def _model_override_path() -> _Path:
+    """Project-scoped path to the mid-flight model override file."""
+    return _get_loki_dir() / "state" / "model-override"
+def _normalize_session_model(raw: str | None) -> str:
+    """Canonical model-alias normalization shared with run.sh + the estimator.
+    Trim, lowercase, and accept ONLY an exact allowlisted alias. A value with
+    interior whitespace (e.g. "fab le") normalizes to "" and is rejected, so the
+    dashboard, the runner, and the estimator agree on what a value means.
+    """
+    val = (raw or "").strip().lower()
+    return val if val in _SESSION_MODEL_ALLOWLIST else ""
+# Provider-config model resolution mirror.
+#
+# SYNC: This is a byte-faithful python port of the claude provider's tier->model
+# resolution in providers/claude.sh (CLAUDE_DEFAULT_FAST / CLAUDE_DEFAULT_DEVELOPMENT
+# and the PROVIDER_MODEL_FAST / PROVIDER_MODEL_DEVELOPMENT resolution chains,
+# claude.sh:55-67) plus loki_apply_max_tier_clamp (claude.sh:318). The same port
+# also lives in the `loki plan` estimator (autonomy/loki, _provider_model_fast /
+# _provider_model_development / _loki_clamp_alias). All three readers MUST agree;
+# the agreement is locked by the parity test in tests/test-model-override.sh
+# ("resolver parity matrix") and the cross-route tests in test-plan-command.sh.
+# If you change resolution here, change it in claude.sh AND autonomy/loki, and
+# re-run those tests. The `or` chains mirror bash `:-` empty-string-fallthrough;
+# allow_haiku uses an exact "true" match to mirror bash `[ "$x" = "true" ]`.
+def _allow_haiku() -> bool:
+    return (os.environ.get("LOKI_ALLOW_HAIKU", "false") or "false") == "true"
+def _provider_model_fast() -> str:
+    # claude.sh:67 -> LOKI_CLAUDE_MODEL_FAST > LOKI_MODEL_FAST > haiku-aware default.
+    return (
+        os.environ.get("LOKI_CLAUDE_MODEL_FAST")
+        or os.environ.get("LOKI_MODEL_FAST")
+        or ("haiku" if _allow_haiku() else "sonnet")
+    )
+def _provider_model_development() -> str:
+    # claude.sh:66 -> LOKI_CLAUDE_MODEL_DEVELOPMENT > LOKI_MODEL_DEVELOPMENT > default.
+    return (
+        os.environ.get("LOKI_CLAUDE_MODEL_DEVELOPMENT")
+        or os.environ.get("LOKI_MODEL_DEVELOPMENT")
+        or ("sonnet" if _allow_haiku() else "opus")
+    )
+def _clamp_to_max_tier(alias: str) -> str:
+    """Apply the operator LOKI_MAX_TIER ceiling to a model alias.
+    Mirrors providers/claude.sh loki_apply_max_tier_clamp EXACTLY (resolving the
+    clamp result through the SAME provider config the runner uses): a haiku cap
+    pins everything to PROVIDER_MODEL_FAST (sonnet by default, haiku when
+    LOKI_ALLOW_HAIKU=true), and a sonnet cap resolves fable down to
+    PROVIDER_MODEL_DEVELOPMENT (opus by default, sonnet when LOKI_ALLOW_HAIKU=true).
+    The LOKI_CLAUDE_MODEL_FAST/DEVELOPMENT and LOKI_MODEL_FAST/DEVELOPMENT env
+    overrides are honored too. So the dashboard's reported `effective` model agrees
+    byte-for-byte with the model the run will dispatch when a cost ceiling is set.
+    This is invoked with alias as both model and tier (the override-path
+    convention), matching the run.sh mid-flight override clamp.
+    """
+    max_tier = (os.environ.get("LOKI_MAX_TIER") or "").strip().lower()
+    if not max_tier:
+        return alias
+    if max_tier == "haiku":
+        return _provider_model_fast()
+    if max_tier == "sonnet":
+        # The runner's sonnet arm downgrades iff tier/model is planning or fable;
+        # called with alias as both, that reduces to "downgrade iff alias==fable".
+        return _provider_model_development() if alias == "fable" else alias
+    if max_tier == "opus":
+        return "opus" if alias == "fable" else alias
+    return alias
+@app.get("/api/session/model", dependencies=[Depends(auth.require_scope("read"))])
+async def get_session_model():
+    """Report the live run's model override and the effective default.
+    `override` is the alias currently written to .loki/state/model-override
+    (None when no override is active). `default` is the session model the run
+    falls back to when there is no override (LOKI_SESSION_MODEL or the catalog
+    default). `effective` is the model the next iteration will actually use,
+    after the LOKI_MAX_TIER cost ceiling is applied (so the dashboard never
+    reports a model the run would clamp down).
+    The clamp resolves through the SAME provider config the runner uses
+    (LOKI_ALLOW_HAIKU plus the LOKI_CLAUDE_MODEL_FAST/DEVELOPMENT and
+    LOKI_MODEL_FAST/DEVELOPMENT overrides): _clamp_to_max_tier mirrors
+    providers/claude.sh loki_apply_max_tier_clamp byte-for-byte (locked by the
+    resolver parity matrix in tests/test-model-override.sh). So for the OVERRIDE
+    case -- the feature this endpoint exists for -- the reported `effective` model
+    equals the model the runner's mid-flight override path dispatches, given the
+    same environment.
+    KNOWN LIMITATION (cross-process env divergence): the resolution reads
+    LOKI_MAX_TIER, LOKI_ALLOW_HAIKU, LOKI_SESSION_MODEL and the model-override env
+    vars from the DASHBOARD process's environment, which is usually a different
+    process than the live run. So if the run was launched with a different
+    environment than the dashboard, the no-override `default`/`effective` may not
+    reflect the run's real pinned tier or ceiling (e.g. a run pinned to opus still
+    reads "sonnet" here). The override case reads the run's own state file, so its
+    alias is always accurate and the clamp is exact whenever the dashboard shares
+    the run's environment.
+    SCOPE NOTE (no-override default path): when there is no override, `effective`
+    applies the override-path clamp to the session default. The runner's
+    no-override route instead maps a session pin through a tier
+    (resolve_model_for_tier: opus->planning, sonnet->development), which can differ
+    from the override-path clamp in one cell (e.g. an opus pin under sonnet cap +
+    LOKI_ALLOW_HAIKU: the tier route yields sonnet, the override-path clamp yields
+    opus). That session-pin modeling gap is pre-existing and out of scope here;
+    the override case this endpoint serves is exact.
+    """
+    override = None
+    try:
+        p = _model_override_path()
+        if p.is_file():
+            override = _normalize_session_model(p.read_text()) or None
+    except OSError:
+        override = None
+    default = _normalize_session_model(os.environ.get("LOKI_SESSION_MODEL")) or "sonnet"
+    effective = _clamp_to_max_tier(override or default)
+    return {
+        "override": override,
+        "default": default,
+        "effective": effective,
+        "allowed": list(_SESSION_MODEL_ALLOWLIST),
+    }
+@app.post("/api/session/model", dependencies=[Depends(auth.require_scope("control"))])
+async def set_session_model(request: SessionModelRequest):
+    """Set (or clear) the model a live Loki run uses, applied from the NEXT
+    iteration boundary.
+    The run reads .loki/state/model-override at the top of each iteration, so a
+    switch takes effect when the current iteration finishes and the next
+    `claude -p` is spawned (the model is fixed per invocation). The override
+    applies to the CURRENT run only: the runner clears a leftover override at the
+    start of a fresh run, so a switch does not persist into future runs. Body
+    {"model": null} or {"model": ""} clears the override and reverts to the tier
+    mapping. The value is allowlist-validated server-side because the file is fed
+    straight into `claude --model`; arbitrary strings are rejected.
+    The `effective` field reports the model the next iteration will actually use
+    after the LOKI_MAX_TIER cost ceiling is applied (e.g. a fable override under
+    a sonnet ceiling reports the clamped model), so the response never claims a
+    model the run would clamp down. `clamped` is True when the ceiling reduced
+    the requested model.
+    """
+    requested_raw = (request.model or "").strip().lower()
+    override_path = _model_override_path()
+    if requested_raw == "":
+        # Clear the override; revert to tier mapping.
+        try:
+            if override_path.exists():
+                override_path.unlink()
+        except OSError as exc:
+            raise HTTPException(status_code=500, detail=f"Could not clear override: {exc}")
+        return {"model": None, "effective": "next_iteration", "clamped": False}
+    model = _normalize_session_model(requested_raw)
+    if not model:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid model '{request.model}'. Allowed: {', '.join(_SESSION_MODEL_ALLOWLIST)}",
+        )
+    try:
+        override_path.parent.mkdir(parents=True, exist_ok=True)
+        override_path.write_text(model + "\n")
+    except OSError as exc:
+        raise HTTPException(status_code=500, detail=f"Could not write override: {exc}")
+    effective = _clamp_to_max_tier(model)
+    return {"model": model, "effective": effective, "clamped": effective != model}
 @app.get("/api/running-projects")
 async def list_running_projects():
     """List registered projects enriched with live status for the dashboard
@@ -4389,6 +4585,9 @@ async def stop_session(request: Request):
 # At runtime, overridden by .loki/pricing.json if available
 _DEFAULT_PRICING = {
     # Claude (Anthropic)
+    # Fable 5 is the top-tier advisory model at exactly 2x Opus per token.
+    "fable":  {"input": 10.00, "output": 50.00},
+    "claude-fable-5": {"input": 10.00, "output": 50.00},
     "opus":   {"input": 5.00, "output": 25.00},
     "sonnet": {"input": 3.00, "output": 15.00},
     "haiku":  {"input": 1.00, "output": 5.00},
@@ -7997,4 +8196,30 @@ def run_server(host: str = None, port: int = None) -> None:
 if __name__ == "__main__":
-    run_server()
+    # Honor an explicit --port/--host on a direct module launch
+    # (python -m dashboard.server --port N). The supported `loki dashboard start`
+    # path sets LOKI_DASHBOARD_PORT in the environment and passes NO argv flags,
+    # so it is unaffected. Previously --port was silently accepted and discarded,
+    # binding the default 57374 and risking a collision with another project's
+    # dashboard; now an unknown flag fails loudly via argparse (exit 2).
+    import argparse
+    parser = argparse.ArgumentParser(
+        prog="python -m dashboard.server",
+        description="Loki Mode dashboard server. The supported launcher is "
+        "'loki dashboard start' (which uses LOKI_DASHBOARD_PORT / "
+        "LOKI_DASHBOARD_HOST); these flags are for direct module launches.",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=None,
+        help="Port to bind (default: $LOKI_DASHBOARD_PORT or 57374).",
+    )
+    parser.add_argument(
+        "--host",
+        default=None,
+        help="Host to bind (default: $LOKI_DASHBOARD_HOST or 127.0.0.1).",
+    )
+    _args = parser.parse_args()
+    run_server(host=_args.host, port=_args.port)