PyPI - caudate-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

caudate-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

api/__init__.py +5 -0
api/anthropic_compat.py +1518 -0
api/artifact_viewer.py +366 -0
api/caudate_middleware.py +618 -0
api/forge_bootstrapper_routes.py +377 -0
api/forge_routes.py +630 -0
api/forge_system_routes.py +294 -0
api/openai_compat.py +1993 -0
api/server.py +667 -0
api/storyboard_page.py +677 -0
caudate_cli-0.1.0.dist-info/METADATA +354 -0
caudate_cli-0.1.0.dist-info/RECORD +153 -0
caudate_cli-0.1.0.dist-info/WHEEL +5 -0
caudate_cli-0.1.0.dist-info/entry_points.txt +2 -0
caudate_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
caudate_cli-0.1.0.dist-info/top_level.txt +14 -0
cognos_mcp/__init__.py +4 -0
cognos_mcp/bridge.py +41 -0
cognos_mcp/client.py +70 -0
cognos_mcp/config.py +49 -0
cognos_mcp/server.py +66 -0
config.py +82 -0
core/__init__.py +0 -0
core/agent.py +468 -0
core/agentic_loop.py +731 -0
core/anthropic_auth.py +91 -0
core/background.py +113 -0
core/banner.py +134 -0
core/bootstrap.py +292 -0
core/citations.py +131 -0
core/compaction.py +109 -0
core/constitution.py +198 -0
core/diff_viewer.py +87 -0
core/export.py +85 -0
core/file_refs.py +119 -0
core/files.py +199 -0
core/hooks.py +209 -0
core/image.py +599 -0
core/input.py +91 -0
core/loop.py +238 -0
core/memory_md.py +147 -0
core/notifications.py +99 -0
core/ownership.py +181 -0
core/paste.py +81 -0
core/permissions.py +210 -0
core/plan_mode.py +215 -0
core/sandbox_prompt.py +185 -0
core/scheduler.py +195 -0
core/schemas.py +202 -0
core/session.py +90 -0
core/settings.py +132 -0
core/skills.py +398 -0
core/slash_commands.py +977 -0
core/statusline.py +61 -0
core/subagent.py +300 -0
core/thinking.py +50 -0
core/updater.py +122 -0
core/usage.py +109 -0
core/worktree.py +93 -0
execution/__init__.py +0 -0
execution/executor.py +329 -0
execution/plugins.py +108 -0
execution/tools/__init__.py +0 -0
execution/tools/agent_tool.py +107 -0
execution/tools/agentic_tool.py +297 -0
execution/tools/artifact_tool.py +191 -0
execution/tools/ask_user_question_tool.py +137 -0
execution/tools/base.py +81 -0
execution/tools/calculator_tool.py +137 -0
execution/tools/cognos_card_tool.py +124 -0
execution/tools/cron_tool.py +215 -0
execution/tools/datetime_tool.py +215 -0
execution/tools/describe_image_tool.py +161 -0
execution/tools/draw_tool.py +164 -0
execution/tools/edit_image_tool.py +262 -0
execution/tools/edit_tool.py +245 -0
execution/tools/file_tool.py +90 -0
execution/tools/find_anywhere_tool.py +255 -0
execution/tools/forge_feature_tools.py +377 -0
execution/tools/glob_tool.py +59 -0
execution/tools/grep_tool.py +89 -0
execution/tools/http_request_tool.py +224 -0
execution/tools/load_skill_tool.py +104 -0
execution/tools/longcat_avatar_tool.py +384 -0
execution/tools/mcp_tool.py +100 -0
execution/tools/notebook_tool.py +279 -0
execution/tools/openapi_tool.py +440 -0
execution/tools/plan_mode_tool.py +95 -0
execution/tools/push_notification_tool.py +157 -0
execution/tools/python_tool.py +61 -0
execution/tools/respond_tool.py +40 -0
execution/tools/sandbox_tool.py +378 -0
execution/tools/search_tool.py +153 -0
execution/tools/semantic_search_tool.py +106 -0
execution/tools/shell_tool.py +283 -0
execution/tools/speak_tool.py +134 -0
execution/tools/storyboard_tool.py +727 -0
execution/tools/system_info_tool.py +212 -0
execution/tools/task_tool.py +323 -0
execution/tools/think_tool.py +49 -0
execution/tools/transcribe_audio_tool.py +86 -0
execution/tools/update_memory_tool.py +92 -0
execution/tools/web_fetch_tool.py +82 -0
execution/tools/worktree_tool.py +174 -0
llm/__init__.py +0 -0
llm/fallback.py +116 -0
llm/models.py +320 -0
llm/provider.py +1356 -0
llm/router.py +373 -0
main.py +1889 -0
memory/__init__.py +0 -0
memory/episodic.py +99 -0
memory/procedural.py +145 -0
memory/semantic.py +71 -0
memory/working.py +64 -0
nn/__init__.py +43 -0
nn/auto_evolve.py +245 -0
nn/caudate.py +136 -0
nn/config.py +141 -0
nn/consolidator.py +81 -0
nn/data.py +1635 -0
nn/encoder.py +258 -0
nn/forge_advisor.py +303 -0
nn/format.py +235 -0
nn/heads.py +432 -0
nn/observer.py +994 -0
nn/policy.py +214 -0
nn/runtime.py +343 -0
nn/scorer.py +175 -0
nn/trainer.py +515 -0
nn/vision.py +352 -0
personality/__init__.py +23 -0
personality/engine.py +129 -0
personality/identity.py +144 -0
personality/inner_voice.py +100 -0
personality/mood.py +205 -0
planning/__init__.py +0 -0
planning/dev_server.py +221 -0
planning/forge_models.py +718 -0
planning/orchestrator.py +1363 -0
planning/planner.py +451 -0
planning/task_graph.py +61 -0
reflection/__init__.py +0 -0
reflection/meta_learner.py +156 -0
reflection/reflector.py +127 -0
ui/__init__.py +5 -0
ui/display.py +88 -0
voice/__init__.py +0 -0
voice/conversation.py +125 -0
voice/listener.py +111 -0
voice/speaker.py +59 -0
voice/stt.py +126 -0
voice/tts.py +214 -0

api/server.py ADDED Viewed

@@ -0,0 +1,667 @@
+"""FastAPI server — exposes Cognos as an HTTP service.
+Endpoints:
+  POST /chat                 — send a message, get a reply (optional session)
+  POST /chat/stream          — same, streamed as SSE
+  GET  /sessions             — list saved sessions
+  GET  /sessions/{id}        — fetch one session
+  DELETE /sessions/{id}      — delete a session
+  GET  /tools                — list registered tools with schemas
+  GET  /models               — list detected models with capability flags
+  GET  /healthz              — liveness probe
+Every `CognosAgent` is cached by session id so conversations are stateful
+across requests. The API is NOT multi-tenant — it assumes a trusted caller
+(no auth layer in this cut).
+"""
+from __future__ import annotations
+import asyncio
+import logging
+from contextlib import asynccontextmanager
+from typing import Any
+import tempfile
+from pathlib import Path
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.responses import (
+    FileResponse, HTMLResponse, RedirectResponse, StreamingResponse,
+)
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel, Field
+from core.agent import CognosAgent
+from core.citations import CitationBlock, Document
+from core.files import FileStore
+from core.session import SessionManager
+logger = logging.getLogger(__name__)
+# ---- Request / response schemas ----
+class ChatRequest(BaseModel):
+    message: str
+    session_id: str | None = None
+    model: str | None = None
+    system1: str | None = None
+    system2: str | None = None
+    permission_mode: str | None = Field(
+        default=None,
+        description="default|plan|accept_edits|bypass",
+    )
+    attachments: list[str] = Field(
+        default_factory=list,
+        description="File IDs previously uploaded via POST /files",
+    )
+    documents: list[Document] = Field(
+        default_factory=list,
+        description="Reference documents the model may cite",
+    )
+class ChatResponse(BaseModel):
+    reply: str
+    session_id: str
+    mood: str | None = None
+    tool_calls: int = 0
+    citations: list[CitationBlock] = Field(default_factory=list)
+class FIMRequest(BaseModel):
+    prefix: str
+    suffix: str = ""
+    # Default model picked at call time from llm.provider.DEFAULT_FIM_MODEL.
+    model: str | None = None
+    temperature: float | None = None
+    max_tokens: int | None = None
+    stop: list[str] | None = None
+class FIMResponse(BaseModel):
+    completion: str
+    model: str
+# ---- App factory ----
+def create_app() -> FastAPI:
+    """Build the FastAPI application with its agent cache."""
+    agent_cache: dict[str, CognosAgent] = {}
+    lock = asyncio.Lock()
+    @asynccontextmanager
+    async def lifespan(app: FastAPI):
+        logger.info("Cognos API starting")
+        # Reclaim any forge sessions that were in_progress at the time
+        # the previous server died — flip them to terminated and bounce
+        # the feature back to backlog. Idempotent; safe even when no
+        # forge tables exist yet.
+        try:
+            from planning.orchestrator import reconcile_orphaned_sessions
+            n = reconcile_orphaned_sessions()
+            if n:
+                logger.info(f"forge: reconciled {n} orphaned session(s)")
+        except Exception as e:
+            logger.debug(f"forge reconcile skipped: {e}")
+        try:
+            yield
+        finally:
+            logger.info("Cognos API shutting down — stopping agents")
+            for a in agent_cache.values():
+                try:
+                    await a.stop()
+                except Exception as e:
+                    logger.debug(f"agent.stop failed: {e}")
+            # Reap dev servers spawned by /forge/projects/{id}/dev-server
+            try:
+                from planning import dev_server as _ds
+                n = _ds.stop_all()
+                if n:
+                    logger.info(f"stopped {n} forge dev-server(s)")
+            except Exception as e:
+                logger.debug(f"dev_server.stop_all failed: {e}")
+    app = FastAPI(title="Cognos", version="1.0", lifespan=lifespan)
+    async def _get_agent(req: ChatRequest) -> CognosAgent:
+        """Fetch or create an agent bound to the given session."""
+        async with lock:
+            session_id = req.session_id
+            if session_id and session_id in agent_cache:
+                return agent_cache[session_id]
+            agent = CognosAgent(
+                model=req.model,
+                mode="agentic",
+                permission_mode=req.permission_mode,
+                session_id=session_id,
+                system1=req.system1,
+                system2=req.system2,
+            )
+            agent_cache[agent.session.id] = agent
+            return agent
+    # ------------------------------------------------------------------
+    # Routes
+    # ------------------------------------------------------------------
+    @app.get("/healthz")
+    async def healthz() -> dict[str, str]:
+        return {"status": "ok"}
+    @app.post("/v1/fim", response_model=FIMResponse)
+    async def fim(req: FIMRequest) -> FIMResponse:
+        """Fill-in-the-middle code completion.
+        Side-channel for editor autocomplete — does NOT touch the
+        System 1 / System 2 chat router. Default model is the local
+        qwen2.5-coder for low-latency single-line/short-block fills;
+        pass a heavier model id for multi-line gaps.
+        """
+        from llm.provider import fim_complete as _fim, DEFAULT_FIM_MODEL
+        model = req.model or DEFAULT_FIM_MODEL
+        try:
+            completion = await _fim(
+                prefix=req.prefix,
+                suffix=req.suffix,
+                model=model,
+                temperature=req.temperature,
+                max_tokens=req.max_tokens,
+                stop=req.stop,
+            )
+        except ValueError as e:
+            raise HTTPException(status_code=400, detail=str(e))
+        except Exception as e:
+            raise HTTPException(status_code=502, detail=f"FIM call failed: {e}")
+        return FIMResponse(completion=completion, model=model)
+    @app.post("/chat", response_model=ChatResponse)
+    async def chat(req: ChatRequest) -> ChatResponse:
+        if not req.message:
+            raise HTTPException(400, "message is required")
+        agent = await _get_agent(req)
+        # Web-UI calls are allowed to use the Claude Code subscription
+        # OAuth token (read live from ~/.claude/.credentials.json) when
+        # the configured model is anthropic/*. Any non-web caller of
+        # LLMProvider sees no change in behaviour.
+        from core.anthropic_auth import subscription_auth_scope
+        with subscription_auth_scope():
+            reply = await agent.chat(
+                req.message,
+                attachments=req.attachments or None,
+                documents=req.documents or None,
+            )
+        return ChatResponse(
+            reply=reply,
+            session_id=agent.session.id,
+            mood=agent.personality.mood.label() if agent.personality else None,
+            tool_calls=sum(1 for m in agent.agentic.messages if m.get("role") == "tool"),
+            citations=list(agent.agentic.last_citations),
+        )
+    @app.post("/chat/stream")
+    async def chat_stream(req: ChatRequest) -> StreamingResponse:
+        if not req.message:
+            raise HTTPException(400, "message is required")
+        agent = await _get_agent(req)
+        from core.anthropic_auth import subscription_auth_scope
+        async def _sse():
+            import json as _json
+            with subscription_auth_scope():
+                async for event in agent.agentic.run_streaming(req.message):
+                    payload = _json.dumps(event.model_dump(mode="json"))
+                    yield f"event: {event.type}\ndata: {payload}\n\n"
+            yield f"event: done\ndata: {{\"session_id\": \"{agent.session.id}\"}}\n\n"
+        return StreamingResponse(_sse(), media_type="text/event-stream")
+    @app.get("/sessions")
+    async def list_sessions() -> list[dict[str, Any]]:
+        from config import SESSIONS_DIR
+        sm = SessionManager(SESSIONS_DIR)
+        return [
+            {
+                "id": s.id, "title": s.title, "model": s.model,
+                "messages": len(s.messages),
+                "updated_at": s.updated_at.isoformat(timespec="seconds"),
+            }
+            for s in sm.list()
+        ]
+    @app.get("/sessions/{session_id}")
+    async def get_session(session_id: str) -> dict[str, Any]:
+        from config import SESSIONS_DIR
+        sm = SessionManager(SESSIONS_DIR)
+        s = sm.load(session_id)
+        if s is None:
+            raise HTTPException(404, "session not found")
+        return s.model_dump(mode="json")
+    @app.delete("/sessions/{session_id}")
+    async def delete_session(session_id: str) -> dict[str, bool]:
+        from config import SESSIONS_DIR
+        sm = SessionManager(SESSIONS_DIR)
+        deleted = sm.delete(session_id)
+        if session_id in agent_cache:
+            try:
+                await agent_cache[session_id].stop()
+            except Exception:
+                pass
+            del agent_cache[session_id]
+        if not deleted:
+            raise HTTPException(404, "session not found")
+        return {"deleted": True}
+    @app.post("/files")
+    async def upload_file(file: UploadFile = File(...)) -> dict[str, Any]:
+        """Upload a file into the local store; returns the new file_id."""
+        from config import FILES_DIR
+        store = FileStore(root=FILES_DIR)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename or "upload").suffix) as tmp:
+            tmp.write(await file.read())
+            tmp_path = Path(tmp.name)
+        try:
+            record = store.upload(tmp_path, filename=file.filename or tmp_path.name)
+        finally:
+            tmp_path.unlink(missing_ok=True)
+        return record.model_dump(mode="json")
+    @app.get("/files")
+    async def list_files() -> list[dict[str, Any]]:
+        from config import FILES_DIR
+        store = FileStore(root=FILES_DIR)
+        return [r.model_dump(mode="json") for r in store.list()]
+    @app.delete("/files/{file_id}")
+    async def delete_file(file_id: str) -> dict[str, bool]:
+        from config import FILES_DIR
+        store = FileStore(root=FILES_DIR)
+        if not store.delete(file_id):
+            raise HTTPException(404, "file not found")
+        return {"deleted": True}
+    @app.get("/files/{file_id}/content")
+    async def get_file_content(file_id: str):
+        """Stream a stored file's bytes — used by the UI for inline image rendering."""
+        from config import FILES_DIR
+        store = FileStore(root=FILES_DIR)
+        rec = store.get(file_id)
+        if rec is None:
+            raise HTTPException(404, "file not found")
+        # Render-in-browser types must serve as `inline`; otherwise
+        # iframes / <img> / <audio> see Content-Disposition:attachment
+        # and abort the load (was breaking the artifact viewer's
+        # iframe for HTML files).
+        mime = rec.mime_type or "application/octet-stream"
+        inline_kinds = (
+            mime.startswith("image/") or
+            mime.startswith("audio/") or
+            mime.startswith("video/") or
+            mime.startswith("text/") or
+            mime in ("application/pdf", "application/json",
+                     "image/svg+xml")
+        )
+        disposition = "inline" if inline_kinds else "attachment"
+        return FileResponse(
+            rec.path,
+            media_type=mime,
+            filename=rec.filename,
+            content_disposition_type=disposition,
+        )
+    @app.get("/artifact/{file_id}", response_class=HTMLResponse)
+    async def get_artifact_viewer(file_id: str):
+        """Polished single-page viewer for a FileStore artifact —
+        Phase 1 of COGNOS_UI_ROADMAP.md. Renders HTML/SVG/code/
+        markdown/image/audio with type-appropriate UI; serves a
+        download fallback for unknown binaries."""
+        from config import FILES_DIR
+        from api.artifact_viewer import render_artifact_page
+        store = FileStore(root=FILES_DIR)
+        rec = store.get(file_id)
+        if rec is None:
+            raise HTTPException(404, "file not found")
+        try:
+            content_bytes = Path(rec.path).read_bytes()
+        except OSError as e:
+            raise HTTPException(500, f"failed to read artifact: {e}")
+        page = render_artifact_page(
+            file_id=file_id,
+            filename=rec.filename,
+            mime_type=rec.mime_type or "",
+            size_bytes=rec.size_bytes,
+            content_bytes=content_bytes,
+        )
+        return HTMLResponse(content=page)
+    # ------------------------------------------------------------------
+    # Storyboard — dedicated SPA + SSE generation endpoint
+    # ------------------------------------------------------------------
+    # ------------------------------------------------------------------
+    # Storyboard React app (Approach C) — Vite-built SPA at
+    # /storyboard-app/. Falls back to index.html for client-side routes
+    # like /storyboard-app/storyboard, /storyboard-app/help, etc.
+    # ------------------------------------------------------------------
+    _STORYBOARD_DIST = Path(__file__).resolve().parent.parent / "ui" / "storyboard-app" / "dist"
+    if _STORYBOARD_DIST.is_dir():
+        # Static asset directory (JS, CSS, images bundled by Vite).
+        app.mount(
+            "/storyboard-app/assets",
+            StaticFiles(directory=str(_STORYBOARD_DIST / "assets")),
+            name="storyboard-app-assets",
+        )
+        # Vite copies anything under public/ to the dist root. We need
+        # an explicit route for the favicon (and any future top-level
+        # static files) so the catch-all route below doesn't shadow
+        # them by returning index.html.
+        @app.get("/storyboard-app/favicon.png")
+        async def storyboard_app_favicon() -> FileResponse:
+            return FileResponse(
+                str(_STORYBOARD_DIST / "favicon.png"),
+                media_type="image/png",
+            )
+        @app.get("/storyboard-app", response_class=HTMLResponse)
+        @app.get("/storyboard-app/", response_class=HTMLResponse)
+        @app.get("/storyboard-app/{client_route:path}",
+                 response_class=HTMLResponse)
+        async def storyboard_app(client_route: str = "") -> HTMLResponse:
+            """Serve the React SPA for any client-side route. The
+            React Router (`basename=/storyboard-app`) takes over once
+            the JS bundle loads."""
+            # Real assets under /storyboard-app/assets are handled by
+            # the StaticFiles mount above and never reach this route.
+            # Anything else returns the bundled index.html so React
+            # Router can resolve the in-app path.
+            return HTMLResponse(
+                content=(_STORYBOARD_DIST / "index.html").read_text(
+                    encoding="utf-8",
+                ),
+            )
+    else:
+        logger.info(
+            "storyboard-app dist not built; "
+            "run `cd ui/storyboard-app && npm run build` to enable /storyboard-app"
+        )
+    @app.get("/storyboard", response_class=HTMLResponse)
+    async def storyboard_page() -> HTMLResponse:
+        """Single-page HTML storyboard generator (Approach B).
+        Talks to `POST /storyboard/generate` over Server-Sent Events
+        for live progress as each panel renders.
+        """
+        from api.storyboard_page import render_storyboard_page
+        return HTMLResponse(content=render_storyboard_page())
+    @app.post("/storyboard/generate")
+    async def storyboard_generate(req: dict) -> StreamingResponse:
+        """SSE: stream storyboard breakdown + per-panel completion.
+        Request body (JSON):
+          { "story": str, "panels": int, "style": str,
+            "size": "1024x1024" | "1280x720" | "768x1024",
+            "seed": int | null }
+        Stream events (each as `data: {json}\\n\\n`):
+          {type: "breakdown", plan: {...}}
+          {type: "panel", index, file_id, scene_action, panel_text,
+            character_names_in_scene}
+          {type: "done", panel_count, panel_file_ids}
+          {type: "error", message}
+        """
+        import json as _json
+        # Reuse a long-lived storyboard session so the FLUX-schnell +
+        # Kontext pipelines stay loaded across requests.
+        session_id = "_storyboard_app"
+        async with lock:
+            agent = agent_cache.get(session_id)
+            if agent is None:
+                agent = CognosAgent(
+                    mode="agentic", session_id=session_id,
+                )
+                agent_cache[session_id] = agent
+        tool = agent.loop.executor.get_tool("Storyboard")
+        if tool is None or not hasattr(tool, "execute_streaming"):
+            raise HTTPException(500, "Storyboard tool not registered")
+        story = (req.get("story") or "").strip()
+        if not story:
+            raise HTTPException(400, "`story` is required")
+        kwargs: dict[str, Any] = {
+            "story": story,
+            "panels": int(req.get("panels") or 6),
+            "style": (req.get("style") or "").strip(),
+            "size": req.get("size") or "1024x1024",
+        }
+        if req.get("seed") is not None:
+            try:
+                kwargs["seed"] = int(req["seed"])
+            except Exception:
+                pass
+        async def _event_stream():
+            try:
+                async for ev in tool.execute_streaming(**kwargs):
+                    yield f"data: {_json.dumps(ev)}\n\n".encode()
+            except Exception as e:
+                logger.exception("storyboard SSE failed")
+                err = _json.dumps({"type": "error", "message": str(e)})
+                yield f"data: {err}\n\n".encode()
+        return StreamingResponse(
+            _event_stream(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "X-Accel-Buffering": "no",  # disable nginx buffering
+            },
+        )
+    @app.get("/tools")
+    async def list_tools() -> list[dict[str, Any]]:
+        from execution.executor import Executor
+        ex = Executor()
+        return [
+            {"name": t.name, "description": t.description, "input_schema": t.input_schema}
+            for t in (ex.get_tool(n) for n in ex.list_tools())
+            if t is not None
+        ]
+    @app.get("/usage")
+    async def get_usage() -> dict[str, Any]:
+        """Process-wide token + cost report."""
+        from core.usage import get_global_tracker
+        return get_global_tracker().report()
+    @app.post("/usage/reset")
+    async def reset_usage() -> dict[str, bool]:
+        from core.usage import reset_global_tracker
+        reset_global_tracker()
+        return {"reset": True}
+    @app.get("/personality")
+    async def get_personality() -> dict[str, Any]:
+        """Current identity + mood snapshot."""
+        from config import DATA_DIR
+        from personality import PersonalityEngine
+        engine = PersonalityEngine.load(DATA_DIR)
+        return {
+            "identity": {
+                "describe": engine.identity.describe(),
+                "fields": {k: getattr(engine.identity, k) for k in engine.identity.model_fields},
+            },
+            "mood": {
+                "label": engine.mood.label(),
+                "fragment": engine.mood.system_prompt_fragment(),
+                "fields": {k: getattr(engine.mood, k) for k in engine.mood.model_fields},
+            },
+        }
+    @app.post("/sessions/{session_id}/compact")
+    async def compact_session(session_id: str) -> dict[str, Any]:
+        """Force-compact the message history of a saved session."""
+        from config import (
+            COMPACT_KEEP_RECENT, COMPACT_THRESHOLD, CONTEXT_WINDOW_SIZE,
+            SESSIONS_DIR,
+        )
+        from core.compaction import ContextCompactor
+        from llm.provider import LLMProvider
+        sm = SessionManager(SESSIONS_DIR)
+        s = sm.load(session_id)
+        if s is None:
+            raise HTTPException(404, "session not found")
+        compactor = ContextCompactor(
+            llm=LLMProvider(model=s.model or None),
+            context_window=CONTEXT_WINDOW_SIZE,
+            compact_threshold=COMPACT_THRESHOLD,
+            keep_recent=COMPACT_KEEP_RECENT,
+        )
+        before = len(s.messages)
+        s.messages = await compactor.compact(s.messages)
+        sm.save(s)
+        if session_id in agent_cache:
+            agent_cache[session_id].agentic.messages = list(s.messages)
+        return {"before": before, "after": len(s.messages)}
+    @app.post("/sessions/{session_id}/reset")
+    async def reset_session(session_id: str) -> dict[str, bool]:
+        """Clear the message history of a session in place."""
+        from config import SESSIONS_DIR
+        sm = SessionManager(SESSIONS_DIR)
+        s = sm.load(session_id)
+        if s is None:
+            raise HTTPException(404, "session not found")
+        s.messages = []
+        sm.save(s)
+        if session_id in agent_cache:
+            agent_cache[session_id].agentic.messages = []
+        return {"reset": True}
+    @app.get("/sessions/{session_id}/export")
+    async def export_session_endpoint(session_id: str, format: str = "markdown") -> Any:
+        """Render a saved session as markdown / json / html."""
+        from config import SESSIONS_DIR
+        from core.export import to_html, to_json, to_markdown
+        sm = SessionManager(SESSIONS_DIR)
+        s = sm.load(session_id)
+        if s is None:
+            raise HTTPException(404, "session not found")
+        fmt = format.lower()
+        if fmt in ("markdown", "md"):
+            from fastapi.responses import PlainTextResponse
+            return PlainTextResponse(to_markdown(s))
+        if fmt == "json":
+            return s.model_dump(mode="json")
+        if fmt == "html":
+            from fastapi.responses import HTMLResponse
+            return HTMLResponse(to_html(s))
+        raise HTTPException(400, f"Unknown format: {format}")
+    @app.post("/audit")
+    async def audit_tail(lines: int = 50) -> list[dict[str, Any]]:
+        """Tail the permission audit log."""
+        import json as _json
+        from config import AUDIT_LOG_PATH
+        if not AUDIT_LOG_PATH.exists():
+            return []
+        raw = AUDIT_LOG_PATH.read_text().splitlines()[-lines:]
+        out: list[dict[str, Any]] = []
+        for line in raw:
+            try:
+                out.append(_json.loads(line))
+            except Exception:
+                continue
+        return out
+    @app.post("/notify")
+    async def notify_endpoint(payload: dict[str, Any]) -> dict[str, bool]:
+        """Send a desktop notification on the host."""
+        from core.notifications import notify
+        title = payload.get("title", "Cognos")
+        body = payload.get("body", "")
+        ok = notify(title, body)
+        return {"sent": ok}
+    @app.get("/caudate")
+    async def caudate_status() -> dict[str, Any]:
+        """Inspect Caudate's current state. Pulls from any cached agent first;
+        falls back to a fresh observer (so the UI works even before any
+        chat has happened)."""
+        from nn.observer import CaudateObserver
+        if agent_cache:
+            cau = getattr(next(iter(agent_cache.values())), "caudate", None)
+            if cau is not None:
+                return cau.status()
+        return CaudateObserver(auto_train=False).status()
+    @app.post("/caudate/train")
+    async def caudate_train_endpoint() -> dict[str, Any]:
+        """Trigger a synchronous training burst on the active observer."""
+        if not agent_cache:
+            raise HTTPException(503, "no active agent")
+        cau = getattr(next(iter(agent_cache.values())), "caudate", None)
+        if cau is None:
+            raise HTTPException(503, "Caudate not initialized")
+        await asyncio.to_thread(cau._train_sync)
+        cau.reload_advisor()
+        return cau.status()
+    # Web UI — mounted at /ui, with `/` redirecting there for convenience.
+    web_dir = Path(__file__).resolve().parent.parent / "ui" / "web"
+    if web_dir.exists():
+        app.mount("/ui", StaticFiles(directory=str(web_dir), html=True), name="ui")
+        @app.get("/", include_in_schema=False)
+        async def root_redirect():
+            return RedirectResponse(url="/ui")
+    # Forge — autonomous coding harness REST API + SSE event streams.
+    # State in data/cognos.db (planning/forge_models.py); execution in
+    # planning/orchestrator.py; UI lives at /ui/forge inside the SPA.
+    from api.forge_routes import build_router as _build_forge_router
+    app.include_router(_build_forge_router())
+    from api.forge_system_routes import build_router as _build_forge_system_router
+    app.include_router(_build_forge_system_router())
+    from api.forge_bootstrapper_routes import build_router as _build_forge_boot_router
+    app.include_router(_build_forge_boot_router())
+    # Anthropic-compatible /v1/messages — lets Claude Code (or any
+    # Anthropic-format client) treat Cognos as its backend.
+    from api.anthropic_compat import build_router as _build_anthropic_router
+    app.include_router(_build_anthropic_router(), tags=["anthropic-compat"])
+    # OpenAI-compatible /v1/chat/completions — lets Open WebUI (or any
+    # OpenAI-format client) treat Cognos as its backend. Same Caudate
+    # observer + subscription auth scope as the web UI's `/chat`.
+    from api.openai_compat import build_router as _build_openai_router
+    app.include_router(_build_openai_router(), tags=["openai-compat"])
+    @app.get("/models")
+    async def list_models() -> list[dict[str, Any]]:
+        from llm.models import ModelRegistry
+        reg = ModelRegistry()
+        await reg.refresh()
+        return [
+            {
+                "id": m.id, "name": m.name, "provider": m.provider,
+                "supports_tool_calling": m.supports_tool_calling,
+                "supports_json_mode": m.supports_json_mode,
+                "context_window": m.context_window,
+                "size_bytes": m.size_bytes,
+            }
+            for m in reg.models()
+        ]
+    return app