npm - @geravant/sinain - Versions diffs - 1.13.0 → 1.15.0 - Mend

@geravant/sinain 1.13.0 → 1.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.env.example +33 -27
package/cli.js +30 -14
package/config-shared.js +173 -30
package/launcher.js +38 -21
package/onboard.js +36 -20
package/package.json +4 -1
package/sinain-agent/run.sh +600 -127
package/sinain-core/src/agents-loader.ts +254 -0
package/sinain-core/src/buffers/feed-buffer.ts +6 -4
package/sinain-core/src/config.ts +77 -15
package/sinain-core/src/escalation/escalator.ts +178 -18
package/sinain-core/src/index.ts +218 -31
package/sinain-core/src/learning/local-curation.ts +81 -27
package/sinain-core/src/overlay/commands.ts +25 -0
package/sinain-core/src/overlay/ws-handler.ts +3 -0
package/sinain-core/src/server.ts +101 -10
package/sinain-core/src/types.ts +29 -3
package/sinain-memory/graph_query.py +12 -3
package/sinain-memory/knowledge_integrator.py +194 -10
package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/embed_client.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
package/sinain-memory/eval/__init__.py +0 -0
package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
package/sinain-memory/eval/assertions.py +0 -267
package/sinain-memory/eval/benchmarks/__init__.py +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/base_adapter.py +0 -43
package/sinain-memory/eval/benchmarks/config.py +0 -23
package/sinain-memory/eval/benchmarks/evaluate.py +0 -146
package/sinain-memory/eval/benchmarks/ingest.py +0 -152
package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
package/sinain-memory/eval/benchmarks/judges/qa_judge.py +0 -81
package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +0 -177
package/sinain-memory/eval/benchmarks/meeting_adapter.py +0 -81
package/sinain-memory/eval/benchmarks/meeting_runner.py +0 -230
package/sinain-memory/eval/benchmarks/query.py +0 -193
package/sinain-memory/eval/benchmarks/report.py +0 -87
package/sinain-memory/eval/benchmarks/run_meeting_bench.sh +0 -318
package/sinain-memory/eval/benchmarks/runner.py +0 -283
package/sinain-memory/eval/judges/__init__.py +0 -0
package/sinain-memory/eval/judges/base_judge.py +0 -61
package/sinain-memory/eval/judges/curation_judge.py +0 -46
package/sinain-memory/eval/judges/insight_judge.py +0 -48
package/sinain-memory/eval/judges/mining_judge.py +0 -42
package/sinain-memory/eval/judges/signal_judge.py +0 -45
package/sinain-memory/eval/retrieval_benchmark.jsonl +0 -12
package/sinain-memory/eval/retrieval_evaluator.py +0 -186
package/sinain-memory/eval/schemas.py +0 -247
package/sinain-memory/tests/__init__.py +0 -0
package/sinain-memory/tests/conftest.py +0 -189
package/sinain-memory/tests/test_curator_helpers.py +0 -94
package/sinain-memory/tests/test_embedder.py +0 -210
package/sinain-memory/tests/test_extract_json.py +0 -124
package/sinain-memory/tests/test_feedback_computation.py +0 -121
package/sinain-memory/tests/test_miner_helpers.py +0 -71
package/sinain-memory/tests/test_module_management.py +0 -458
package/sinain-memory/tests/test_parsers.py +0 -96
package/sinain-memory/tests/test_tick_evaluator.py +0 -430
package/sinain-memory/tests/test_triple_extractor.py +0 -255
package/sinain-memory/tests/test_triple_ingest.py +0 -191
package/sinain-memory/tests/test_triple_migrate.py +0 -138
package/sinain-memory/tests/test_triplestore.py +0 -248

package/sinain-core/src/escalation/escalator.ts CHANGED Viewed

@@ -33,6 +33,23 @@ export interface EscalatorDeps {
   feedbackStore?: FeedbackStore;
   signalCollector?: SignalCollector;
   queryKnowledgeFacts?: (entities: string[], maxFacts: number) => Promise<string>;
+  /** Returns the currently-selected spawn-lane agent from the bare-agent
+   *  roster ("" = Off). When a local agent is selected, dispatchSpawnTask
+   *  prefers the HTTP bare-agent path over the OpenClaw gateway WS path,
+   *  so the overlay's agent-selector choice is respected even when the
+   *  gateway is connected. */
+  getSpawnAgent?: () => string;
+  /** Returns the currently-selected escalation-lane agent. Gateway-typed
+   *  profiles (any agent whose `type` is "openclaw" — see isGatewayAgent)
+   *  route via WS; any other non-empty value routes to the local bare
+   *  agent via HTTP httpPending. */
+  getEscalationAgent?: () => string;
+  /** Returns true if the named profile is a gateway-style profile
+   *  (i.e. dispatched via WS RPC, not invoked as a local CLI). Lookup is
+   *  by `agentsCfg.profiles[name].type === "openclaw"`. Custom profiles
+   *  like "nemoclaw" or "nanoclaw-prod" with that type get WS dispatch
+   *  automatically — the routing key is type, not name. */
+  isGatewayAgent?: (name: string) => boolean;
 }
 /**
@@ -52,6 +69,14 @@ export class Escalator {
   private slot: EscalationSlot;
   private httpPending: HttpPendingEscalation | null = null;
+  // Grace window for stale escalation IDs — when analyzer rotates the pending
+  // slot mid-response (agent takes 10-30s on MCP flow while ticks fire every
+  // 3-6s), the agent's respondHttp(oldId) would fail. Keep last 5 IDs for ~60s
+  // so those responses still land on HUD instead of being silently dropped.
+  private recentHttpIds: Array<{ id: string; ts: number }> = [];
+  private static readonly STALE_ID_GRACE_MS = 60_000;
+  private static readonly STALE_ID_BUFFER_SIZE = 5;
   private lastEscalationTs = 0;
   private lastEscalatedDigest = "";
@@ -139,9 +164,17 @@ export class Escalator {
     log(TAG, `user command set: "${preview}"`);
   }
-  /** Start the WS connection to OpenClaw (skipped when transport=http). */
+  /** Start the WS connection to OpenClaw.
+   *
+   * Connects whenever the gateway URL is configured AND escalation isn't
+   * fully off. WS is the transport for the openclaw lane — the user selects
+   * it via the overlay's agent picker, and dispatch routes accordingly.
+   * Removing the openclaw profile from agents.json (and unsetting the env
+   * vars) leaves gatewayWsUrl empty → no connect attempt.
+   */
   start(): void {
-    if (this.deps.escalationConfig.mode !== "off" && this.deps.escalationConfig.transport !== "http") {
+    const wsConfigured = !!this.deps.openclawConfig.gatewayWsUrl;
+    if (this.deps.escalationConfig.mode !== "off" && wsConfigured) {
       this.wsClient.connect();
       const tokenHash = this.deps.openclawConfig.gatewayToken
         ? createHash("sha256").update(this.deps.openclawConfig.gatewayToken).digest("hex").slice(0, 12)
@@ -185,11 +218,16 @@ export class Escalator {
       this.pendingUserCommand = null;
     }
-    // Skip WS escalations when circuit is open (HTTP transport bypasses this)
-    const transport = this.deps.escalationConfig.transport;
-    if (this.wsClient.isCircuitOpen && transport !== "http") {
-      log(TAG, `tick #${entry.id}: skipped — circuit breaker open`);
-      return;
+    // Early skip when circuit is open AND the user has selected openclaw —
+    // saves the cost of building the escalation message just to drop it.
+    // Local-agent lanes (claude, openclaude, etc.) bypass this since they
+    // route via HTTP and don't depend on WS.
+    if (this.wsClient.isCircuitOpen) {
+      const escalationAgent = this.deps.getEscalationAgent?.() || "";
+      if (this.deps.isGatewayAgent?.(escalationAgent)) {
+        log(TAG, `tick #${entry.id}: skipped — circuit breaker open and gateway agent "${escalationAgent}" selected`);
+        return;
+      }
     }
     // If user command is pending, force escalation (bypass score + cooldown)
@@ -275,9 +313,48 @@ export class Escalator {
       ts: entry.ts,
     };
-    const useHttp = transport === "http" || (transport === "auto" && !this.wsClient.isConnected);
+    // Per-lane dispatch: agent identity *is* the transport.
+    //   - profile.type === "openclaw" (gateway-style) → WS dispatch
+    //   - any other non-empty agent (local CLI: claude, openclaude, ...) → HTTP
+    //   - empty (Off) → escalator.setMode("off") should have stopped us
+    //     upstream; defensive bailout.
+    //
+    // Routing keys off the profile's `type` field, not its name, so custom
+    // gateway profiles like "nemoclaw" or "nanoclaw-prod" route via WS
+    // automatically as long as they declare `type: "openclaw"` in agents.json.
+    //
+    // openclaw + WS-disconnected drops with a toast (no HTTP fallback),
+    // because the bare agent can't run a gateway profile as a local CLI —
+    // the fallback caused infinite skip loops historically.
+    const escalationAgent = this.deps.getEscalationAgent?.() || "";
+    const isGateway = this.deps.isGatewayAgent?.(escalationAgent) ?? false;
+    let useHttp: boolean;
+    if (isGateway) {
+      if (!this.wsClient.isConnected) {
+        log(TAG, `escalation dropped: gateway agent "${escalationAgent}" selected but WS disconnected`);
+        this.deps.wsHandler.broadcast(
+          `⚠ Gateway disconnected — escalation dropped. Pick a local agent or check the ${escalationAgent} gateway.`,
+          "high",
+        );
+        return;
+      }
+      useHttp = false;
+    } else if (escalationAgent) {
+      useHttp = true;
+    } else {
+      log(TAG, `escalation dropped: lane is Off (escalationAgent="")`);
+      return;
+    }
     if (useHttp) {
+      // Remember the outgoing ID before overwriting so late-arriving responses
+      // still find a valid match in respondHttp's grace window.
+      if (this.httpPending) {
+        this.recentHttpIds.push({ id: this.httpPending.id, ts: this.httpPending.ts });
+        if (this.recentHttpIds.length > Escalator.STALE_ID_BUFFER_SIZE) {
+          this.recentHttpIds.shift();
+        }
+      }
       // Store in HTTP pending slot (newest wins, like EscalationSlot)
       this.httpPending = {
         id: slotId,
@@ -287,13 +364,50 @@ export class Escalator {
         ts: entry.ts,
         feedbackCtx: slotEntry.feedbackCtx,
       };
-      log(TAG, `tick #${entry.id} → httpPending id=${slotId} (transport=${transport})`);
+      log(TAG, `tick #${entry.id} → httpPending id=${slotId} (lane=${escalationAgent || "<default>"})`);
     } else {
       log(TAG, `tick #${entry.id} → slot.insert id=${slotId} depth=${this.slot.depth}`);
       this.slot.insert(slotEntry);
     }
   }
+  /** Redispatch a stale httpPending escalation through the WS slot.
+   *
+   * Called by index.ts when the escalation lane flips to a gateway-typed
+   * agent (e.g., openclaude → openclaw): an escalation queued for HTTP
+   * before the switch is now mis-routed. Rather than letting the bare
+   * agent skip it (which posts a confusing "[skipped: gateway-routed]"
+   * to the user's HUD), we move it into the WS slot so the gateway
+   * actually handles the user's pending question.
+   *
+   * If WS isn't connected, silently clear httpPending — the agent loop
+   * will produce a new escalation through the proper drop-with-toast
+   * path on the next tick. Better than the user seeing the skip message
+   * AND the gateway-disconnect toast for the same logical event.
+   *
+   * Returns true if a redispatch (or clear) actually happened, so the
+   * caller can log meaningfully.
+   */
+  redispatchHttpPendingToWs(): boolean {
+    if (!this.httpPending) return false;
+    const stale = this.httpPending;
+    this.httpPending = null;
+    if (!this.wsClient.isConnected) {
+      log(TAG, `redispatch skipped: WS not connected — cleared stale httpPending id=${stale.id}`);
+      return true;
+    }
+    const slotEntry: SlotEntry = {
+      id: stale.id,
+      message: stale.message,
+      sessionKey: this.deps.openclawConfig.sessionKey,
+      feedbackCtx: stale.feedbackCtx,
+      ts: stale.ts,
+    };
+    log(TAG, `redispatching stale httpPending id=${stale.id} → WS slot (lane switched to gateway)`);
+    this.slot.insert(slotEntry);
+    return true;
+  }
   /** Push fresh SITUATION.md content to the gateway server (fire-and-forget). */
   pushSituationMd(content: string): void {
     if (!this.wsClient.isConnected) return;
@@ -381,11 +495,29 @@ ${recentLines.join("\n")}`;
   /** Respond to an HTTP pending escalation. */
   respondHttp(id: string, response: string): { ok: boolean; error?: string } {
-    if (!this.httpPending) {
-      return { ok: false, error: "no pending escalation" };
-    }
-    if (this.httpPending.id !== id) {
-      return { ok: false, error: `id mismatch: expected ${this.httpPending.id}` };
+    // Grace path: the agent's response arrived for a stale ID because the
+    // analyzer rotated the pending slot mid-flight. Still push to HUD — the
+    // response was written against context that was fresh seconds ago and is
+    // almost certainly still relevant — but don't clear the current pending,
+    // so the agent can still address the newer escalation on its next poll.
+    if (!this.httpPending || this.httpPending.id !== id) {
+      const recent = this.recentHttpIds.find((e) => e.id === id);
+      if (recent && Date.now() - recent.ts < Escalator.STALE_ID_GRACE_MS) {
+        // Grace path: response was generated against a context that's now
+        // stale (analyzer rotated the slot mid-flight) but still recent
+        // enough that the answer is almost certainly still relevant.
+        // Push to HUD and return a clean ok=true — don't surface the
+        // grace marker on the wire, because generic LLM clients read
+        // any non-empty `error` field as a failure signal and write
+        // apologetic meta-messages to the user. The breadcrumb stays
+        // in this log for debug.
+        log(TAG, `respondHttp grace: id=${id} is stale (rotated ${((Date.now() - recent.ts) / 1000).toFixed(1)}s ago) — pushing to HUD anyway`);
+        this.pushResponse(response, this.lastEscalationContext);
+        return { ok: true };
+      }
+      return this.httpPending
+        ? { ok: false, error: `id mismatch: expected ${this.httpPending.id}` }
+        : { ok: false, error: "no pending escalation" };
     }
     this.pushResponse(response, this.lastEscalationContext);
@@ -446,7 +578,6 @@ ${recentLines.join("\n")}`;
   getStats(): Record<string, unknown> {
     return {
       mode: this.deps.escalationConfig.mode,
-      transport: this.deps.escalationConfig.transport,
       gatewayConnected: this.wsClient.isConnected,
       circuitOpen: this.wsClient.isCircuitOpen,
       slotDepth: this.slot.depth,
@@ -501,13 +632,42 @@ ${recentLines.join("\n")}`;
     // ★ Broadcast "spawned" BEFORE the RPC — TSK tab shows ··· immediately
     this.broadcastTaskEvent(taskId, "spawned", label, startedAt);
-    if (!this.wsClient.isConnected) {
-      // No OpenClaw gateway — queue for bare agent HTTP polling
+    // Route explicitly by the overlay's spawn-agent selection:
+    //   "openclaw" (or "" with WS connected) → send to remote gateway via WS RPC
+    //   any other non-empty value             → queue for local bare agent HTTP poll
+    //   "" with WS disconnected               → queue for HTTP fallback (same)
+    // This makes the overlay's choice authoritative. Before openclaw was a
+    // roster option, the old heuristic "if WS connected, use gateway" hijacked
+    // every spawn regardless of user intent, which surfaced as 401/credential
+    // errors from the gateway's stale OpenRouter key.
+    // Per-lane dispatch (mirror of escalation routing above):
+    //   - profile.type === "openclaw" → WS to gateway (drop with toast if
+    //     WS down — bare agent can't run gateway profiles as local CLIs)
+    //   - any other non-empty agent → HTTP queue for bare agent polling
+    //   - empty (Off) → drop; the spawn poll skip in run.sh should already
+    //     prevent us from getting here.
+    const spawnAgent = this.deps.getSpawnAgent?.() || "";
+    const spawnIsGateway = this.deps.isGatewayAgent?.(spawnAgent) ?? false;
+    if (spawnIsGateway) {
+      if (!this.wsClient.isConnected) {
+        log(TAG, `spawn-task ${taskId}: dropped — gateway agent "${spawnAgent}" selected but WS disconnected`);
+        this.deps.wsHandler.broadcast(
+          `⚠ Gateway disconnected — spawn task dropped. Pick a local agent or check the ${spawnAgent} gateway.`,
+          "high",
+        );
+        return;
+      }
+      // Fall through to gateway dispatch below.
+    } else if (spawnAgent) {
+      // Local bare-agent path: queue for polling.
       this.spawnHttpPending = { id: taskId, task, label: label || "background-task", ts: startedAt };
       const preview = task.length > 60 ? task.slice(0, 60) + "…" : task;
       this.deps.feedBuffer.push(`🔧 Task queued for agent: ${preview}`, "normal", "system", "stream");
       this.deps.wsHandler.broadcast(`🔧 Task queued for agent: ${preview}`, "normal");
-      log(TAG, `spawn-task ${taskId}: WS disconnected — queued for bare agent polling`);
+      log(TAG, `spawn-task ${taskId}: queued for bare agent (lane=${spawnAgent})`);
+      return;
+    } else {
+      log(TAG, `spawn-task ${taskId}: dropped — lane is Off (spawnAgent="")`);
       return;
     }

package/sinain-core/src/index.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { existsSync } from "node:fs";
 import { loadConfig } from "./config.js";
+import { loadAgentsConfig, isGatewayProfile, gatewayProfileNames } from "./agents-loader.js";
 import { FeedBuffer } from "./buffers/feed-buffer.js";
 import { SenseBuffer } from "./buffers/sense-buffer.js";
 import { WsHandler } from "./overlay/ws-handler.js";
@@ -67,35 +68,66 @@ async function queryKnowledgeFactsMulti(entities: string[], maxFacts: number): P
   ];
   const scriptPath = scriptCandidates.find(p => existsSync(p)) || scriptCandidates[0];
-  const results: string[] = [];
+  // Step 1: Get candidates from Python (RRF-ranked, no embedding — avoids deadlock)
+  // Request 2x candidates in JSON for re-ranking in Node.js
+  const candidateFacts: Array<Record<string, string>> = [];
   for (const dbPath of dbPaths) {
     if (!existsSync(dbPath)) continue;
     try {
-      const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "compact"];
+      const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts * 2), "--format", "json"];
       if (entities.length > 0) args.push("--entities", JSON.stringify(entities));
       const out = execFileSync("python3", args, { timeout: 5000, encoding: "utf-8" }).trim();
-      if (out) results.push(out);
+      if (out) {
+        const parsed = JSON.parse(out);
+        const facts = parsed.facts || parsed;
+        if (Array.isArray(facts)) candidateFacts.push(...facts);
+      }
     } catch { /* skip failed db */ }
   }
-  if (results.length === 0) return "";
-  if (results.length === 1) return results[0];
+  if (candidateFacts.length === 0) return "";
-  // Merge and deduplicate lines from both sources
-  const seen = new Set<string>();
-  const merged: string[] = [];
-  for (const block of results) {
-    for (const line of block.split("\n")) {
-      const key = line.replace(/\(confidence:.*$/, "").trim();
-      if (key && !seen.has(key)) {
-        seen.add(key);
-        merged.push(line);
-      }
+  // Step 2: Re-rank by embedding similarity in-process (no deadlock — model is in this process)
+  const queryText = entities.join(" ");
+  try {
+    if (embeddingService?.ready) {
+      const allTexts = [queryText, ...candidateFacts.map(f => f.value || "")];
+      const embeddings = await embeddingService.embed(allTexts);
+      const queryEmb = embeddings[0];
+      const scored = candidateFacts.map((f, i) => ({
+        fact: f,
+        sim: EmbeddingService.cosine(queryEmb, embeddings[i + 1]),
+      }));
+      scored.sort((a, b) => b.sim - a.sim);
+      candidateFacts.length = 0;
+      candidateFacts.push(...scored.slice(0, maxFacts).map(s => s.fact));
     }
+  } catch { /* embedding unavailable — use RRF order */ }
+  // Step 3: Format as compact text
+  const seen = new Set<string>();
+  const lines: string[] = [];
+  let total = 0;
+  const maxChars = 1200;
+  for (const f of candidateFacts.slice(0, maxFacts)) {
+    const eid = ((f as any).entity_id || (f as any).entityId || "").split(":").pop()?.slice(0, 20) || "?";
+    const value = (f as any).value || "";
+    const conf = (f as any).confidence || "?";
+    const count = (f as any).reinforce_count || "1";
+    const line = `${eid}: ${value} (${conf},${count}x)`;
+    const key = value.slice(0, 60);
+    if (seen.has(key)) continue;
+    seen.add(key);
+    if (total + line.length + 2 > maxChars) break;
+    lines.push(line);
+    total += line.length + 2;
   }
-  return merged.slice(0, maxFacts).join("\n");
+  return lines.join("; ");
 }
+// Reference to embedding service — set during init
+let embeddingService: import("./embedding/service.js").EmbeddingService | null = null;
 /** List all entities from both local and workspace knowledge graphs. */
 async function listKnowledgeEntitiesMulti(max: number): Promise<string> {
   const { execFileSync } = await import("node:child_process");
@@ -340,11 +372,16 @@ async function main() {
     : null;
   // ── Initialize embedding service (non-blocking) ──
-  const embeddingService = new EmbeddingService();
+  embeddingService = new EmbeddingService();
   embeddingService.loadAsync(); // ~9s background load, server starts immediately
   // ── Initialize local knowledge pipeline ──
-  const localCuration = new LocalCurationService();
+  // Pass wsHandler.broadcast so the periodic curator (insight_synthesizer)
+  // can push suggestions/insights directly to HUD without going through the
+  // bare-agent heartbeat. Replaces the old sinain_post_feed MCP roundtrip.
+  const localCuration = new LocalCurationService(
+    (text) => wsHandler.broadcast(text),
+  );
   // Distill pending session in background — don't block server startup
   setImmediate(() => {
     localCuration.distillPendingSession();
@@ -359,6 +396,14 @@ async function main() {
   });
   // ── Initialize escalation ──
+  // getSpawnAgent reads bareAgentState (declared later in this function) via
+  // closure at call-time, NOT at construction time. Safe because
+  // dispatchSpawnTask only fires after an overlay message, which can't
+  // happen before server setup completes.
+  // Load agents.json once for lookup helpers passed to escalator. Same file
+  // config.ts reads at startup; re-loading here keeps the dispatch lookup
+  // contained to a closure (no need to expose agentsCfg through CoreConfig).
+  const escalatorAgentsCfg = loadAgentsConfig();
   const escalator = new Escalator({
     feedBuffer,
     wsHandler,
@@ -367,6 +412,12 @@ async function main() {
     profiler,
     feedbackStore: feedbackStore ?? undefined,
     queryKnowledgeFacts: queryKnowledgeFactsMulti,
+    getSpawnAgent: () => bareAgentState.spawnAgent,
+    getEscalationAgent: () => bareAgentState.escalationAgent,
+    // Type-based gateway lookup. Routing key is agents.json `profiles[name].type`,
+    // so any custom profile with `type: "openclaw"` (e.g. "nemoclaw",
+    // "nanoclaw-prod") gets WS dispatch automatically — no name-matching.
+    isGatewayAgent: (name: string) => isGatewayProfile(escalatorAgentsCfg, name),
   });
   // ── Initialize agent loop (event-driven) ──
@@ -548,6 +599,88 @@ async function main() {
   // ── Escalation pause/resume state ──
   let savedEscalationMode: typeof config.escalationConfig.mode | null = null;
+  /** Pause escalation (idempotent). Caches the pre-pause mode so resume
+   *  can restore it. Re-entering while already paused is a no-op — crucially,
+   *  does NOT overwrite savedEscalationMode with "off". */
+  function pauseEscalationInternal(): void {
+    const current = config.escalationConfig.mode;
+    if (current === "off") return;
+    savedEscalationMode = current;
+    escalator.setMode("off");
+    log(TAG, `escalation paused (was: ${savedEscalationMode})`);
+  }
+  /** Resume escalation (idempotent). Restores the saved mode or falls back
+   *  to "rich" if no saved mode exists. Re-entering while already active
+   *  is a no-op. */
+  function resumeEscalationInternal(): void {
+    const current = config.escalationConfig.mode;
+    if (current !== "off") return;
+    const mode = savedEscalationMode ?? "rich";
+    savedEscalationMode = null;
+    escalator.setMode(mode);
+    log(TAG, `escalation resumed (mode: ${mode})`);
+  }
+  // ── Bare-agent roster & per-lane current agent ──
+  // In-memory only (matches escalation-mode lifecycle). Populated when the
+  // bare agent POSTs /bareagent/register on startup; mutated by set_agent
+  // command from the overlay. Empty-string lane values = "Off" (disabled).
+  // Profile names from agents.json may be custom (e.g. "pclaude",
+  // "openclaude-spawn"), so the server validates by character class
+  // rather than a fixed whitelist. The bare agent owns the source of
+  // truth for which profiles actually exist on its host. The validator
+  // just rejects names that could break shell logging, paths, or be
+  // injection vectors.
+  const AGENT_NAME_RE = /^[a-zA-Z][a-zA-Z0-9_-]{0,63}$/;
+  // "openclaw" is reserved-injected below when gatewayWsUrl is set —
+  // it's not a local CLI, it's a routing choice that sends tasks to the
+  // remote OpenClaw gateway via WS RPC instead of the local bare agent.
+  const bareAgentState: {
+    available: string[];
+    escalationAgent: string;
+    spawnAgent: string;
+  } = { available: [], escalationAgent: "", spawnAgent: "" };
+  function registerBareAgent(availableList: string[], current: string): void {
+    const clean = availableList.filter((a) => typeof a === "string" && AGENT_NAME_RE.test(a));
+    // Inject every gateway-style profile (any agents.json profile with
+    // `type: "openclaw"`) into the roster — they have no local binary, so
+    // run.sh's PATH filter drops them, but sinain-core knows they exist
+    // and routes them via WS RPC.
+    //
+    // This generalizes the legacy "auto-inject the literal name 'openclaw'"
+    // behavior: now custom gateway profiles like "nemoclaw" or
+    // "nanoclaw-prod" appear in the overlay roster automatically as soon
+    // as you add them to agents.json. The single WS client uses the first
+    // gateway profile's connection params (config.ts findGatewayProfile);
+    // simultaneous multi-gateway is a follow-up.
+    if (config.openclawConfig.gatewayWsUrl) {
+      for (const gwName of gatewayProfileNames(escalatorAgentsCfg)) {
+        if (!clean.includes(gwName)) clean.push(gwName);
+      }
+      // Legacy fallback: if no gateway profiles are defined in agents.json
+      // but gatewayWsUrl is set via env, still inject the canonical name.
+      if (clean.filter((n) => isGatewayProfile(escalatorAgentsCfg, n)).length === 0
+          && !clean.includes("openclaw")) {
+        clean.push("openclaw");
+      }
+    }
+    bareAgentState.available = clean;
+    // If neither lane is set yet (fresh boot), adopt the bare agent's
+    // reported current. If state survives from a prior register call AND
+    // the agent still exists in the roster, keep it; otherwise fall back
+    // to the new current.
+    if (!bareAgentState.escalationAgent || !clean.includes(bareAgentState.escalationAgent)) {
+      bareAgentState.escalationAgent = clean.includes(current) ? current : (clean[0] ?? "");
+    }
+    if (!bareAgentState.spawnAgent || !clean.includes(bareAgentState.spawnAgent)) {
+      bareAgentState.spawnAgent = clean.includes(current) ? current : (clean[0] ?? "");
+    }
+    wsHandler.updateState({ agents: { ...bareAgentState } });
+    log(TAG, `bareagent register: available=[${clean.join(",")}] current=${current} → lanes esc=${bareAgentState.escalationAgent} spawn=${bareAgentState.spawnAgent}`);
+  }
   // ── Create HTTP + WS server ──
   const server = createAppServer({
     config,
@@ -661,6 +794,18 @@ async function main() {
     isEscalationPaused: () => savedEscalationMode !== null,
     respondEscalation: (id: string, response: string) => escalator.respondHttp(id, response),
+    // Bare-agent roster & config (wired to server endpoints in step 2).
+    registerBareAgent,
+    getBareAgentConfig: () => ({
+      escalationAgent: bareAgentState.escalationAgent,
+      spawnAgent: bareAgentState.spawnAgent,
+      // Tells the bare agent whether core still has its roster. On core
+      // restart this flips to false until the next /bareagent/register POST
+      // — distinguishes "user picked Off/Off" (registered=true, lanes="")
+      // from "core forgot about us" (registered=false).
+      registered: bareAgentState.available.length > 0,
+    }),
     // Knowledge graph integration (checks both local and workspace DBs)
     getKnowledgeDocPath: () => {
       // Check local first, then workspace
@@ -683,8 +828,8 @@ async function main() {
     },
     getSpawnPending: () => escalator.getSpawnPending(),
     respondSpawn: (id: string, result: string) => escalator.respondSpawn(id, result),
-    embedTexts: (texts: string[]) => embeddingService.embed(texts),
-    isEmbeddingReady: () => embeddingService.ready,
+    embedTexts: (texts: string[]) => embeddingService!.embed(texts),
+    isEmbeddingReady: () => embeddingService?.ready ?? false,
   });
   // ── Wire overlay profiling ──
@@ -721,20 +866,62 @@ async function main() {
       return screenActive;
     },
     onToggleEscalation: () => {
-      if (savedEscalationMode === null) {
-        // Pause: save current mode, switch to off
-        savedEscalationMode = config.escalationConfig.mode;
-        escalator.setMode("off");
-        log(TAG, `escalation paused (was: ${savedEscalationMode})`);
+      // Routes through the shared helpers so the set_agent("escalation","")
+      // path and the flash-icon-toggle path share a single source of truth
+      // for savedEscalationMode. Kept for WS backward-compat; new UI uses
+      // the agent selector.
+      if (config.escalationConfig.mode === "off") {
+        resumeEscalationInternal();
+        return true;
+      } else {
+        pauseEscalationInternal();
         return false;
+      }
+    },
+    onSetAgent: (lane: "escalation" | "spawn", agent: string): { ok: boolean; error?: string } => {
+      // Empty-string agent = Off (lane disabled). Non-empty agent must be
+      // in the current roster; stale overlay state can send something that
+      // isn't available — reject with a clear error.
+      if (agent !== "" && !bareAgentState.available.includes(agent)) {
+        return { ok: false, error: `Agent "${agent}" not available` };
+      }
+      if (lane === "escalation") {
+        const prevAgent = bareAgentState.escalationAgent;
+        bareAgentState.escalationAgent = agent;
+        if (agent === "") {
+          pauseEscalationInternal();
+        } else {
+          resumeEscalationInternal();
+        }
+        // If the user flipped to a gateway-typed agent (openclaw, nemoclaw,
+        // ...) and there's a stale httpPending escalation queued from BEFORE
+        // the switch, re-dispatch it through the WS path. Without this, the
+        // bare agent picks up the stale entry on its next poll and posts a
+        // "[skipped: gateway-routed]" message to the HUD — confusing for
+        // the user, who just told us "use the gateway".
+        const wasGateway = isGatewayProfile(escalatorAgentsCfg, prevAgent);
+        const isGateway = isGatewayProfile(escalatorAgentsCfg, agent);
+        if (!wasGateway && isGateway) {
+          const did = escalator.redispatchHttpPendingToWs();
+          if (did) log(TAG, `lane switch ${prevAgent || "<empty>"} → ${agent}: stale httpPending redispatched`);
+        }
       } else {
-        // Resume: restore saved mode
-        const mode = savedEscalationMode;
-        savedEscalationMode = null;
-        escalator.setMode(mode);
-        log(TAG, `escalation resumed (mode: ${mode})`);
-        return true;
+        bareAgentState.spawnAgent = agent;
+        // Spawn "off" just means run.sh won't poll /spawn/pending; no
+        // server-side state to flip. Queued spawn tasks TTL out naturally.
       }
+      // Rebroadcast state so the overlay sees the switch immediately, and
+      // the bare agent sees it on its next poll-response config piggyback.
+      // `escalation` field reflects the current escalator mode so the flash
+      // icon's color (active/paused) updates on Off-for-escalation.
+      wsHandler.updateState({
+        agents: { ...bareAgentState },
+        escalation: config.escalationConfig.mode === "off" ? "paused" : "active",
+      });
+      const displayAgent = agent || "off";
+      wsHandler.broadcast(`Agent switched: ${lane} → ${displayAgent}`, "normal", "stream");
+      log(TAG, `set_agent lane=${lane} agent=${displayAgent}`);
+      return { ok: true };
     },
   });