npm - @jhizzard/termdeck - Versions diffs - 0.13.0 → 0.14.0 - Mend

@jhizzard/termdeck 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/packages/client/public/app.js +60 -16
package/packages/server/src/agent-adapters/codex.js +199 -0
package/packages/server/src/agent-adapters/gemini.js +158 -0
package/packages/server/src/agent-adapters/grok-models.js +115 -0
package/packages/server/src/agent-adapters/grok.js +253 -0
package/packages/server/src/agent-adapters/index.js +6 -0
package/packages/server/src/index.js +45 -6
package/packages/server/src/rumen-pool-resilience.js +111 -0
package/packages/server/src/session.js +45 -57

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@jhizzard/termdeck",
-  "version": "0.13.0",
+  "version": "0.14.0",
   "description": "Browser-based terminal multiplexer with metadata overlays, panel flashback memory recall, and AI-aware session management",
   "bin": {
     "termdeck": "./packages/cli/src/index.js"

package/packages/client/public/app.js CHANGED Viewed

@@ -10,6 +10,14 @@
       layout: '2x1',
       themes: {},
       config: {},
+      // Sprint 45 T4: serializable projection of the multi-agent registry
+      // (server's AGENT_ADAPTERS). Populated from GET /api/agent-adapters
+      // during init(). The launcher's command-shorthand parser reads this
+      // to detect which adapter (if any) a typed command should map to.
+      // Fallback list is the pre-Sprint-45 default so the launcher still
+      // works if the endpoint 404s on an older server during a rolling
+      // upgrade — Claude only, anchored binary match.
+      agentAdapters: [{ name: 'claude', sessionType: 'claude-code', binary: 'claude', costBand: 'pay-per-token' }],
       focusedId: null
     };
@@ -27,6 +35,17 @@
       state.config = await api('GET', '/api/config');
       updateRagIndicator();
+      // Sprint 45 T4: fetch the multi-agent adapter registry projection.
+      // Drives the launcher's command-shorthand → sessionType resolution
+      // below in launchTerminal(). Falls back to the bootstrap default
+      // (Claude only) if the endpoint isn't available on this server.
+      try {
+        const adapters = await api('GET', '/api/agent-adapters');
+        if (Array.isArray(adapters) && adapters.length > 0) {
+          state.agentAdapters = adapters;
+        }
+      } catch (_) { /* keep bootstrap fallback */ }
       // Populate project dropdown
       const sel = document.getElementById('promptProject');
       for (const name of Object.keys(state.config.projects || {})) {
@@ -2460,29 +2479,54 @@
         return;
       }
-      // Parse shorthand commands
+      // Sprint 45 T4: registry-driven shorthand resolution. Pre-Sprint-45
+      // had hardcoded claude/cc/gemini/python branches here; now the type
+      // detection consults state.agentAdapters (loaded from
+      // /api/agent-adapters at init), and only the Claude `cc` alias and
+      // the python-server detection (no adapter exists) stay as
+      // special-cases below. Adapter matching uses an anchored prefix on
+      // the adapter's binary name (`^binary\b`, case-insensitive) which
+      // fits all four Sprint-45 adapters (claude / codex / gemini / grok)
+      // since each binary is uniquely named.
       let resolvedCommand = command;
       let resolvedType = 'shell';
       let resolvedCwd = undefined;
       let resolvedProject = project || undefined;
-      if (/^claude\b/i.test(command) || /^cc\b/i.test(command)) {
-        resolvedType = 'claude-code';
-        const argMatch = command.match(/(?:claude|cc)\s+(?:code\s+)?(.+)/i);
-        if (argMatch) {
-          const arg = argMatch[1].trim();
-          // Check if arg is a known project name
-          if (state.config.projects && state.config.projects[arg]) {
-            resolvedProject = arg;
-          } else {
-            resolvedCwd = arg;
+      // Claude `cc` alias normalization. Documented Claude shorthand —
+      // does not generalize to other adapters, so it stays in client UX,
+      // not in the server-side adapter contract.
+      let canonical = command;
+      if (/^cc\b/i.test(canonical)) {
+        canonical = canonical.replace(/^cc\b/i, 'claude');
+      }
+      const adapter = (state.agentAdapters || []).find((a) =>
+        a && a.binary && new RegExp(`^${a.binary}\\b`, 'i').test(canonical)
+      );
+      if (adapter) {
+        resolvedType = adapter.sessionType;
+        // Claude shorthand: `claude <project-or-cwd>` rewrites to `claude`
+        // and routes the trailing arg into either the project dropdown
+        // (if it's a known project name) or the cwd parameter. Other
+        // adapters' arg-parsing — codex sub-commands, gemini -p flag,
+        // grok --model — pass through unchanged via resolvedCommand.
+        if (adapter.name === 'claude') {
+          const argMatch = canonical.match(/^claude\s+(?:code\s+)?(.+)/i);
+          if (argMatch) {
+            const arg = argMatch[1].trim();
+            if (state.config.projects && state.config.projects[arg]) {
+              resolvedProject = arg;
+            } else {
+              resolvedCwd = arg;
+            }
           }
+          resolvedCommand = adapter.binary;
         }
-        resolvedCommand = 'claude';
-      } else if (/^gemini\b/i.test(command)) {
-        resolvedType = 'gemini';
-      } else if (/^python3?\b.*(?:runserver|uvicorn|flask|gunicorn)/i.test(command)) {
+      } else if (/^python3?\b.*(?:runserver|uvicorn|flask|gunicorn)/i.test(canonical)) {
+        // python-server is a server SUBTYPE for status badges, not an
+        // agent adapter. No registry entry for it; detection stays here.
         resolvedType = 'python-server';
       }

package/packages/server/src/agent-adapters/codex.js ADDED Viewed

@@ -0,0 +1,199 @@
+// Codex CLI adapter — Sprint 45 T1
+//
+// Second adapter in the AGENT_ADAPTERS registry (see ./index.js). Sprint 44 T3
+// shipped the Claude adapter as the reference implementation; this file is the
+// recipe in `docs/AGENT-RUNTIMES.md` § 6 turned into running code for Codex
+// CLI (`/usr/local/bin/codex`, v0.125.0 verified 2026-05-01).
+//
+// This is *Codex-as-its-own-panel* — distinct from the existing
+// `codex@openai-codex` Claude Code plugin which is a delegate-from-Claude
+// pathway. Sprint 46 wires per-lane agent assignment; this lane just makes
+// `codex` work end-to-end inside a TermDeck panel: type detection, status
+// badge, transcript ingestion into Mnestra.
+//
+// Contract — see ./claude.js header for the full annotated shape.
+//
+// Pattern provenance:
+// • Codex CLI ships a Ratatui (Rust) TUI. The TUI redraws on each turn so the
+//   raw PTY stream is heavy in ANSI escapes; session.js stripAnsi() runs
+//   *before* these regexes, so the patterns assume cleaned text.
+// • The headless `codex exec` mode emits a documented sequence: a `--------`
+//   header block, `user` / `codex` speaker lines on their own row, function
+//   `exec_command` blocks, and a `tokens used` footer. The TUI mirrors these
+//   speaker shapes inside its rendered chat surface.
+// • Reasoning markers come from the JSONL `response_item.payload.type=reasoning`
+//   events that the TUI renders as a "Thinking…" status line.
+// • Apply-patch / exec markers come from `response_item.payload.type=function_call`
+//   entries with names like `apply_patch` and `exec_command`.
+//
+// Patterns are conservative defaults — Sprint 45 T4 / Sprint 46 will tune
+// against captured real-world TUI output. Snapshot tests in
+// tests/agent-adapter-codex.test.js pin the current behavior so any tuning
+// is an explicit, reviewed change.
+// ──────────────────────────────────────────────────────────────────────────
+// Patterns
+// ──────────────────────────────────────────────────────────────────────────
+// Codex prompt detection. Three shapes accepted:
+//   1. `codex>` literal (mirrors gemini's `gemini>` and the codex CLI's REPL
+//      prompt convention — used by `codex resume` interactive sessions).
+//   2. A bare `codex` line (the speaker label the TUI prints above an
+//      assistant turn AND that headless `codex exec` prints before the reply).
+//   3. The `--------` divider that wraps the codex header block in headless
+//      mode and bookends turns in the TUI.
+const PROMPT = /^(?:codex>\s|codex\s*$|--------\s*$)/m;
+// Reasoning indicator. Codex's TUI status line shows "Thinking" while the
+// model reasons; "Reasoning" appears in some headless transcripts; "Working"
+// is what `codex exec` prints for tool-loop progress.
+const THINKING = /\b(Thinking|Reasoning|Working)\b/;
+// File edit / patch markers. Codex applies diffs through the `apply_patch`
+// tool which the TUI renders as `Apply patch <file>` headers. Plain
+// Edit/Create/Update/Delete shapes are also kept so simple file ops register
+// (mirrors the Claude adapter's editing markers for cross-adapter parity).
+const EDITING = /^(Apply patch|Edit|Create|Update|Delete|Modified)\s/m;
+const EDITING_DETAIL = /^(Apply patch|Edit|Create|Update|Delete|Modified)\s+(.+)$/m;
+// Tool / shell-exec markers. Codex's TUI prefixes shell commands with `$`
+// (chat-shell convention), arrow `→` for read tool calls, and bare keywords
+// `exec` / `Running` / `Calling` for the phase between dispatch and result.
+// `exec_command` is Codex's function-call name (verified in rollout JSONL
+// 2026-05-01); the alternation handles both bare `exec` and the underscored
+// `exec_command` shape (the underscore is a word character so `exec\b`
+// alone wouldn't match `exec_command`).
+const TOOL = /^(?:\$\s|→\s|exec(?:_command\b|\b)|Running\b|Calling\b)/m;
+// Idle / waiting-for-input. The TUI returns to the bare `codex` speaker
+// label when it's done reasoning and waiting on the user.
+const IDLE = /^codex\s*$/m;
+// Error patterns — line-anchored to avoid mid-line "error" mentions in tool
+// output (grep results, test logs, file dumps) flagging false positives.
+// Same shape as Claude with codex-specific OpenAI-API failure modes added
+// (rate-limit 429, model-not-found, invalid_api_key) which surface as visible
+// strings in Codex's error reporting and would otherwise slip through.
+const ERROR = /^\s*(?:(?:error|Error|ERROR|exception|Exception|Traceback|fatal|Fatal|FATAL|segmentation fault|panic|EACCES|ECONNREFUSED|ENOENT|command not found|undefined reference|cannot find module|failed with exit code|No such file or directory|Permission denied|429\s+Too Many Requests|rate.?limit|invalid_api_key|model_not_found|insufficient_quota)\b|npm ERR!)/m;
+// ──────────────────────────────────────────────────────────────────────────
+// statusFor — Codex panel status. Order mirrors Claude's cascade:
+// thinking → editing → tool → idle. First match wins.
+// ──────────────────────────────────────────────────────────────────────────
+function statusFor(data) {
+  if (THINKING.test(data)) {
+    return { status: 'thinking', statusDetail: 'Codex is reasoning...' };
+  }
+  if (EDITING.test(data)) {
+    const match = data.match(EDITING_DETAIL);
+    return {
+      status: 'editing',
+      statusDetail: match ? `${match[1]} ${match[2]}` : 'Editing files',
+    };
+  }
+  if (TOOL.test(data)) {
+    return { status: 'active', statusDetail: 'Using tools' };
+  }
+  if (IDLE.test(data)) {
+    return { status: 'idle', statusDetail: 'Waiting for input' };
+  }
+  return null;
+}
+// ──────────────────────────────────────────────────────────────────────────
+// parseTranscript — Codex JSONL format.
+//
+// Each line is `{ timestamp, type, payload }`. We want only:
+//   type === 'response_item' && payload.type === 'message'
+// with payload.role in {user, assistant}. The 'developer' role carries the
+// permissions/sandbox prelude — skip. `event_msg` lines duplicate the
+// canonical message channel and additionally carry exec_command_end shell
+// output blocks — skip too.
+//
+// content is an array of { type: 'input_text' | 'output_text', text: string }
+// (sometimes plain `text`). Joined with spaces and truncated to 400 chars
+// per message (same cut-off Claude uses).
+// ──────────────────────────────────────────────────────────────────────────
+function parseTranscript(raw) {
+  if (typeof raw !== 'string' || raw.length === 0) return [];
+  const lines = raw.split('\n').filter(Boolean);
+  const messages = [];
+  for (const line of lines) {
+    let entry;
+    try { entry = JSON.parse(line); } catch (_) { continue; }
+    if (!entry || entry.type !== 'response_item') continue;
+    const p = entry.payload;
+    if (!p || p.type !== 'message') continue;
+    const role = p.role;
+    if (role !== 'user' && role !== 'assistant') continue;
+    const content = p.content;
+    let text = '';
+    if (typeof content === 'string') {
+      text = content;
+    } else if (Array.isArray(content)) {
+      text = content
+        .filter((c) => c && (c.type === 'input_text' || c.type === 'output_text' || c.type === 'text'))
+        .map((c) => c.text || '')
+        .join(' ');
+    }
+    if (text) messages.push({ role, content: text.slice(0, 400) });
+  }
+  return messages;
+}
+// ──────────────────────────────────────────────────────────────────────────
+// bootPromptTemplate — Codex variant of the Claude scaffold. Points at
+// AGENTS.md (Codex's instructional file) instead of CLAUDE.md. Sprint 46 T2
+// will refine per-agent prompts; this is the placeholder so the contract is
+// uniform across all four adapters.
+// ──────────────────────────────────────────────────────────────────────────
+function bootPromptTemplate(lane = {}, sprint = {}) {
+  const tn = lane.id || 'T?';
+  const sprintNum = sprint.number || '?';
+  const sprintName = sprint.name || 'unnamed';
+  const project = (lane.project || sprint.project || 'termdeck');
+  const briefing = lane.briefingPath || `docs/sprint-${sprintNum}-${sprintName}/${tn}-<lane>.md`;
+  return [
+    `You are ${tn} in Sprint ${sprintNum} (${sprintName}). Boot sequence:`,
+    `1. memory_recall(project="${project}", query="<topic>")`,
+    `2. memory_recall(query="<broader topic>")`,
+    `3. Read ~/.claude/CLAUDE.md and ./AGENTS.md`,
+    `4. Read docs/sprint-${sprintNum}-${sprintName}/PLANNING.md`,
+    `5. Read docs/sprint-${sprintNum}-${sprintName}/STATUS.md`,
+    `6. Read ${briefing}`,
+    '',
+    'Then begin. Stay in your lane. Post FINDING / FIX-PROPOSED / DONE in STATUS.md.',
+    "Don't bump versions, don't touch CHANGELOG, don't commit.",
+  ].join('\n');
+}
+const codexAdapter = {
+  name: 'codex',
+  sessionType: 'codex',
+  matches: (cmd) => typeof cmd === 'string' && /\bcodex\b/i.test(cmd),
+  spawn: {
+    binary: 'codex',
+    defaultArgs: [],
+    env: { OPENAI_API_KEY: process.env.OPENAI_API_KEY },
+  },
+  patterns: {
+    prompt: PROMPT,
+    thinking: THINKING,
+    editing: EDITING,
+    tool: TOOL,
+    idle: IDLE,
+    error: ERROR,
+  },
+  patternNames: {
+    error: 'codexErrorLineStart',
+  },
+  statusFor,
+  parseTranscript,
+  bootPromptTemplate,
+  costBand: 'pay-per-token',
+};
+module.exports = codexAdapter;

package/packages/server/src/agent-adapters/gemini.js ADDED Viewed

@@ -0,0 +1,158 @@
+// Gemini CLI adapter — Sprint 45 T2
+//
+// Lifts the previously-hardcoded gemini logic out of session.js into the
+// AGENT_ADAPTERS registry alongside the Claude adapter shipped in Sprint 44
+// T3. Behavior is bit-for-bit identical to the pre-Sprint-45 inline path:
+// same `^gemini>` prompt regex, same `Generating|Working` thinking regex,
+// same status strings ("Gemini is generating..." / "Waiting for input"),
+// same loose `/gemini/i` command-string match. parseTranscript is the new
+// capability — Gemini sessions previously didn't write to Mnestra because
+// the memory hook assumed Claude JSONL.
+//
+// Contract — see ./claude.js header for the full 7-field shape.
+//
+// Patterns intentionally omit `error`. The fallback in session.js
+// `_detectErrors` (`adapter.patterns.error || PATTERNS.error`) lets generic
+// prose-shape error detection continue to apply to Gemini sessions, which
+// matches the pre-Sprint-45 behavior. Sprint 46+ can layer in a Gemini-
+// specific line-anchored error pattern once we've observed enough TUI
+// output to know what false positives to dodge.
+// ──────────────────────────────────────────────────────────────────────────
+// Patterns — verbatim regexes lifted from session.js's PATTERNS.geminiCli
+// (lines 47-50). Reference-equal preservation matters because session.js
+// keeps a `PATTERNS.geminiCli` shim that points back at these regex
+// objects, the same way `PATTERNS.claudeCode.*` shimmed Sprint 44 T3.
+// ──────────────────────────────────────────────────────────────────────────
+const PROMPT = /^gemini>\s/m;
+const THINKING = /\b(Generating|Working)\b/;
+// ──────────────────────────────────────────────────────────────────────────
+// statusFor — replaces the `case 'gemini':` block of _updateStatus. Order
+// matches the legacy switch's `if/else if` cascade exactly: thinking wins,
+// then prompt → idle. No editing/tool/error branches in the legacy switch,
+// so statusFor has none either; null returns leave the status untouched
+// just like the legacy fall-through.
+// ──────────────────────────────────────────────────────────────────────────
+function statusFor(data) {
+  if (THINKING.test(data)) {
+    return { status: 'thinking', statusDetail: 'Gemini is generating...' };
+  }
+  if (PROMPT.test(data)) {
+    return { status: 'idle', statusDetail: 'Waiting for input' };
+  }
+  return null;
+}
+// ──────────────────────────────────────────────────────────────────────────
+// parseTranscript — Gemini CLI session JSON format (NOT JSONL).
+//
+// Captured shape (from `gemini -p "say hi"` 2026-05-01):
+//   {
+//     sessionId, projectHash, startTime, lastUpdated, kind,
+//     messages: [
+//       { id, timestamp, type: 'user',   content: [{ text: '...' }] },
+//       { id, timestamp, type: 'gemini', content: '...', thoughts, tokens, model },
+//       ...
+//     ]
+//   }
+//
+// The user role carries a content ARRAY of `{text}` parts; the gemini
+// (assistant) role carries a STRING. We normalize both to the Claude
+// adapter's output shape — `{ role: 'user'|'assistant', content: string }`
+// truncated to 400 chars — so the memory-hook summary builder doesn't have
+// to branch on adapter type.
+//
+// `type: 'gemini'` maps to `role: 'assistant'` for cross-adapter parity.
+// ──────────────────────────────────────────────────────────────────────────
+function parseTranscript(raw) {
+  if (typeof raw !== 'string' || raw.length === 0) return [];
+  let session;
+  try { session = JSON.parse(raw); } catch (_) { return []; }
+  if (!session || !Array.isArray(session.messages)) return [];
+  const messages = [];
+  for (const msg of session.messages) {
+    if (!msg || typeof msg !== 'object') continue;
+    let role;
+    if (msg.type === 'user') role = 'user';
+    else if (msg.type === 'gemini' || msg.type === 'assistant') role = 'assistant';
+    else continue;
+    const content = msg.content;
+    let text = '';
+    if (typeof content === 'string') {
+      text = content;
+    } else if (Array.isArray(content)) {
+      text = content
+        .filter((c) => c && typeof c.text === 'string')
+        .map((c) => c.text)
+        .join(' ');
+    }
+    if (text) messages.push({ role, content: text.slice(0, 400) });
+  }
+  return messages;
+}
+// ──────────────────────────────────────────────────────────────────────────
+// bootPromptTemplate — placeholder mirroring the Claude adapter's shape.
+// Points at GEMINI.md (the auto-generated mirror of CLAUDE.md per Sprint 44
+// T2's sync-agent-instructions.js script). Sprint 46 T2 will refine the
+// per-agent boot prompt — Gemini doesn't have Claude's `memory_recall` MCP
+// tool out-of-the-box, so the lane brief shape may need agent-specific
+// scaffolding. The placeholder here keeps the contract complete.
+// ──────────────────────────────────────────────────────────────────────────
+function bootPromptTemplate(lane = {}, sprint = {}) {
+  const tn = lane.id || 'T?';
+  const sprintNum = sprint.number || '?';
+  const sprintName = sprint.name || 'unnamed';
+  const project = (lane.project || sprint.project || 'termdeck');
+  const briefing = lane.briefingPath || `docs/sprint-${sprintNum}-${sprintName}/${tn}-<lane>.md`;
+  return [
+    `You are ${tn} in Sprint ${sprintNum} (${sprintName}). Boot sequence:`,
+    `1. memory_recall(project="${project}", query="<topic>")`,
+    `2. memory_recall(query="<broader topic>")`,
+    `3. Read ~/.claude/CLAUDE.md and ./GEMINI.md`,
+    `4. Read docs/sprint-${sprintNum}-${sprintName}/PLANNING.md`,
+    `5. Read docs/sprint-${sprintNum}-${sprintName}/STATUS.md`,
+    `6. Read ${briefing}`,
+    '',
+    'Then begin. Stay in your lane. Post FINDING / FIX-PROPOSED / DONE in STATUS.md.',
+    "Don't bump versions, don't touch CHANGELOG, don't commit.",
+  ].join('\n');
+}
+const geminiAdapter = {
+  name: 'gemini',
+  sessionType: 'gemini',
+  matches: (cmd) => typeof cmd === 'string' && /gemini/i.test(cmd),
+  spawn: {
+    binary: 'gemini',
+    defaultArgs: [],
+    // GEMINI_API_KEY is read via `process.env` at spawn time by index.js'
+    // PTY env merge — declared here for documentation / discoverability,
+    // not for in-adapter overriding. OAuth-personal is the typical auth
+    // path (settings.json `security.auth.selectedType: 'oauth-personal'`).
+    env: {},
+  },
+  patterns: {
+    prompt: PROMPT,
+    thinking: THINKING,
+    // editing / tool / error intentionally omitted — see header comment.
+  },
+  patternNames: {
+    // No adapter-owned error pattern → session.js falls back to the
+    // generic `PATTERNS.error` and the `'error'` diag label, which is
+    // exactly what gemini-typed sessions saw pre-Sprint-45.
+  },
+  statusFor,
+  parseTranscript,
+  bootPromptTemplate,
+  costBand: 'pay-per-token',
+};
+module.exports = geminiAdapter;

package/packages/server/src/agent-adapters/grok-models.js ADDED Viewed

@@ -0,0 +1,115 @@
+// Grok model selection — Sprint 45 T3
+//
+// `grok-dev` (the superagent-ai CLI) ships an 11-model lineup spanning
+// $0.2/$0.5 cheap-fast tiers up to $3/$15 flagship. The wrong default
+// silently 10x's a bill on routine tasks: a "look at this file and tell me
+// what's wrong" lane on `grok-4.20-0309-reasoning` (Heavy, $2/$6) costs the
+// same as ten lanes on `grok-4-1-fast-non-reasoning`. The orchestrator picks
+// per-lane via `chooseModel(taskHint)` at boot-prompt construction time
+// (see SPRINT-45-PREP-NOTES.md § "Concern 2: Model selection heuristic").
+// The adapter's `spawn.env.GROK_MODEL` defaults to the cheap-fast model and
+// is overridden per-lane by the launcher.
+//
+// Tier table (price = USD per 1M tokens, in/out):
+//
+//   tier               | model id                          | price    | use case
+//   ───────────────────┼───────────────────────────────────┼──────────┼──────────────────────
+//   fast-non-reasoning | grok-4-1-fast-non-reasoning       | $0.2/0.5 | DEFAULT — routine
+//   fast-reasoning     | grok-4-1-fast-reasoning           | $0.2/0.5 | light CoT under budget
+//   code               | grok-code-fast-1                  | $0.2/1.5 | code gen / refactor
+//   reasoning-deep     | grok-4.20-0309-reasoning          | $2/6     | hard problems, audit
+//   reasoning-non-cot  | grok-4.20-0309-non-reasoning      | $2/6     | high-quality non-CoT
+//   multi-agent        | grok-4.20-multi-agent-0309        | $2/6     | parallel sub-agent fan-out
+//   flagship           | grok-4-0709                       | $3/15    | when Heavy isn't enough
+//   budget-compact     | grok-3-mini                       | $0.3/0.5 | rare — usually wrong
+//
+// `grok-4-fast-non-reasoning`, `grok-4-fast-reasoning`, and `grok-3` are
+// legacy aliases retained for completeness but not in the heuristic switch.
+'use strict';
+// Canonical model ids. Use the symbolic key in code; the heuristic resolves
+// to the live id below. Keep these as data, not constants — Sprint 46+ may
+// gain a `taskHint -> model` override file in `~/.termdeck/`.
+const MODELS = {
+  'fast-non-reasoning': 'grok-4-1-fast-non-reasoning',
+  'fast-reasoning': 'grok-4-1-fast-reasoning',
+  'code': 'grok-code-fast-1',
+  'reasoning-deep': 'grok-4.20-0309-reasoning',
+  'reasoning-non-cot': 'grok-4.20-0309-non-reasoning',
+  'multi-agent': 'grok-4.20-multi-agent-0309',
+  'flagship': 'grok-4-0709',
+  'budget-compact': 'grok-3-mini',
+};
+// Legacy aliases — accepted as input to chooseModel for back-compat with
+// Joshua's earlier `grok models` outputs. Resolution table:
+const LEGACY_ALIASES = {
+  'grok-4-fast-non-reasoning': MODELS['fast-non-reasoning'],
+  'grok-4-fast-reasoning': MODELS['fast-reasoning'],
+  'grok-beta': MODELS['reasoning-deep'],
+  'grok-4.20-multi-agent': MODELS['multi-agent'],
+  'grok-3': MODELS['flagship'],
+};
+// chooseModel — orchestrator-side heuristic. Pass `taskHint` from the lane
+// brief (Sprint 46 frontmatter `model-hint: code|reasoning-deep|...`) or omit
+// for the cheap-fast default. Unknown hints fall back to the default rather
+// than throwing — the bill consequence of a typo silently routing to Heavy
+// is worse than the latency hit of cheap-fast on a hard task.
+function chooseModel(taskHint) {
+  switch (taskHint) {
+    case 'code':
+      return MODELS.code;
+    case 'multi-agent':
+      return MODELS['multi-agent'];
+    case 'reasoning-deep':
+      return MODELS['reasoning-deep'];
+    case 'reasoning-quick':
+    case 'fast-reasoning':
+      return MODELS['fast-reasoning'];
+    case 'reasoning-non-cot':
+      return MODELS['reasoning-non-cot'];
+    case 'flagship':
+      return MODELS.flagship;
+    case 'budget-compact':
+      return MODELS['budget-compact'];
+    case 'fast-non-reasoning':
+    case undefined:
+    case null:
+    case '':
+      return MODELS['fast-non-reasoning'];
+    default:
+      // Accept legacy aliases verbatim; otherwise fall back to cheap-fast.
+      if (LEGACY_ALIASES[taskHint]) return LEGACY_ALIASES[taskHint];
+      return MODELS['fast-non-reasoning'];
+  }
+}
+// getModelInfo — for the launcher / dashboard cost annotations (Sprint 46).
+// Returns the price band so the UI can render a $-tier indicator alongside
+// the model name without each caller knowing the table.
+function getModelInfo(modelId) {
+  const cheap = new Set([
+    MODELS['fast-non-reasoning'],
+    MODELS['fast-reasoning'],
+    MODELS.code,
+  ]);
+  const heavy = new Set([
+    MODELS['reasoning-deep'],
+    MODELS['reasoning-non-cot'],
+    MODELS['multi-agent'],
+  ]);
+  if (cheap.has(modelId)) return { tier: 'cheap', priceIn: 0.2, priceOut: modelId === MODELS.code ? 1.5 : 0.5 };
+  if (heavy.has(modelId)) return { tier: 'heavy', priceIn: 2, priceOut: 6 };
+  if (modelId === MODELS.flagship) return { tier: 'flagship', priceIn: 3, priceOut: 15 };
+  if (modelId === MODELS['budget-compact']) return { tier: 'budget', priceIn: 0.3, priceOut: 0.5 };
+  return { tier: 'unknown', priceIn: null, priceOut: null };
+}
+module.exports = {
+  MODELS,
+  LEGACY_ALIASES,
+  chooseModel,
+  getModelInfo,
+};

package/packages/server/src/agent-adapters/grok.js ADDED Viewed

@@ -0,0 +1,253 @@
+// Grok adapter (superagent-ai grok-dev CLI) — Sprint 45 T3
+//
+// Implements the 7-field adapter contract documented in ./claude.js and
+// docs/AGENT-RUNTIMES.md § 5. TUI mode by default — conversation persists
+// inside the PTY process for the lifetime of the panel, matching the Claude
+// Code pattern. Headless `grok --prompt` is reserved for orchestrator
+// background tasks (Sprint 46+) and is NOT this adapter's spawn shape.
+//
+// Lane-time empirical findings (Sprint 45 T3, 2026-05-01) — see
+// docs/multi-agent-substrate/SPRINT-45-PREP-NOTES.md and Sprint 45 STATUS.md
+// for the full investigation:
+//
+//   • grok-dev v1.1.5, binary `/usr/local/bin/grok` (#!/usr/bin/env bun)
+//   • Session storage: SQLite at ~/.grok/grok.db, NOT JSON files in
+//     ~/.grok/sessions/. Tables (STRICT, requires SQLite ≥3.37):
+//       sessions(id, workspace_id, title, model, mode, status, created_at, ...)
+//       messages(session_id, seq, role, message_json, created_at)
+//       tool_calls, tool_results, usage_events, compactions
+//     `messages.message_json` is a JSON blob in AI SDK provider shape:
+//       { role: 'user'|'assistant'|'tool', content: string | Array<...> }
+//     where array parts are { type: 'text', text } | { type: 'tool-call', ... }
+//     | { type: 'tool-result', ... }. Sprint 45 T4 wires the memory hook to
+//     extract from grok.db and feed parseTranscript a JSON envelope.
+//
+//   • TUI shimmer text strings (the canonical "thinking" indicator):
+//       "Planning next moves"  — default isProcessing without stream content
+//       "Generating plan..."   — plan-mode label
+//       "Answering…"           — /btw overlay
+//   • Tool indicators: TUI renders `→ <label>` (InlineTool component);
+//     headless mode emits `▸ <label>`. Both forms accepted.
+//   • Sub-agents: 5 built-in (general / explore / vision / verify / computer)
+//     plus up to 12 user-defined customs on grok-4.20-multi-agent-0309
+//     (16-agent ceiling). Sub-agent fan-out is internal to grok-dev — the
+//     adapter doesn't need to surface per-sub-agent status; the parent CLI
+//     emits SubagentTaskLine entries that show through as inline tool calls.
+//   • Empty-state placeholder: "Message Grok…" — used only as a weak idle
+//     hint, not a load-bearing pattern.
+//
+// Cost band: 'subscription'. Joshua's SuperGrok Heavy carries the rate
+// limits; non-Heavy users supply GROK_API_KEY / XAI_API_KEY via secrets.env
+// (which the spawn inherits from process.env automatically — no need to
+// re-list it in spawn.env).
+'use strict';
+const { chooseModel } = require('./grok-models');
+// ──────────────────────────────────────────────────────────────────────────
+// Patterns — observed from grok-dev@1.1.5 source (dist/ui/app.js) plus
+// Joshua's smoke test on 2026-05-01. TUI is OpenTUI/React-rendered with
+// frequent redraws; patterns must survive ANSI strip and partial chunks.
+// Conservative bias: false negatives (missed status updates) are cheaper
+// than false positives (badge flapping or spurious 'errored' status).
+// ──────────────────────────────────────────────────────────────────────────
+// Prompt indicator — the TUI's empty-state placeholder. Weak signal but the
+// only stable string that appears reliably in TUI output. Sprint 46 T4 may
+// refine if a more precise marker is observed.
+const PROMPT = /Message Grok[….]/;
+// Thinking — Grok's three known "isProcessing" shimmer states. Hits any of
+// the literal labels. The trailing variants on "Generating" / "Answering"
+// cover both ASCII `...` and Unicode ellipsis.
+const THINKING = /Planning next moves|Generating plan[….]|Answering[….]/;
+// Tool — TUI inline-tool prefix `→ ` (in box layout) OR headless `▸ `
+// (yellow ANSI in dist/headless/output.js:23). Anchored on the leading
+// glyph + space to avoid mid-line `→` in prose markdown firing as a tool.
+// Also catches the activity strings emitted by long-running tools.
+const TOOL = /(?:^|\n)\s*[→▸]\s|Running command[….]|Starting process[….]/;
+// Editing — Grok's TUI prefixes file-mutation tool calls with `Edit` /
+// `Write` / `Read` / `Run` labels rendered through InlineTool. Match these
+// after the tool glyph; the toolLabel function uses these verbatim.
+const EDITING = /(?:^|\n)\s*[→▸]\s+(Edit|Write|Read|Run|Create|Update|Delete)\b/;
+const EDITING_DETAIL = /(?:^|\n)\s*[→▸]\s+((?:Edit|Write|Read|Run|Create|Update|Delete)\b[^\n]*)/;
+// Idle — empty-state shows the placeholder and the cwd footer line. Use the
+// placeholder only — cwd shape varies by terminal width and home expansion.
+const IDLE = /Message Grok[….]\s*$/m;
+// Error — line-anchored variant matching Claude's strategy. Grok's tool
+// output (grep, test logs, lsp diagnostics) routinely carries "Error" /
+// "error" mid-line in a way that should NOT flip the panel to errored. Only
+// fire on line-leading failure phrases — same conservative shape as Claude
+// uses, plus the Grok-specific BtwOverlay error fallback "Something went
+// wrong." literal (rendered in t.diffRemovedFg).
+const ERROR = /(?:^|\n)\s*(?:(?:error|Error|ERROR|exception|Exception|Traceback|fatal|Fatal|FATAL|panic|EACCES|ECONNREFUSED|ENOENT|command not found|cannot find module|failed with exit code|Permission denied|Something went wrong)\b)/m;
+// ──────────────────────────────────────────────────────────────────────────
+// statusFor — replaces the absent grok branch in session.js _updateStatus.
+// Order matches Claude's: thinking → editing → tool → idle. First match
+// wins. Returns null on no-match so the caller leaves status untouched
+// (preserves the "no fallthrough" semantics _updateStatus relies on).
+// ──────────────────────────────────────────────────────────────────────────
+function statusFor(data) {
+  if (typeof data !== 'string') return null;
+  if (THINKING.test(data)) {
+    return { status: 'thinking', statusDetail: 'Grok is reasoning...' };
+  }
+  if (EDITING.test(data)) {
+    const match = data.match(EDITING_DETAIL);
+    return {
+      status: 'editing',
+      statusDetail: match ? match[1].slice(0, 80) : 'Editing files',
+    };
+  }
+  if (TOOL.test(data)) {
+    return { status: 'active', statusDetail: 'Using tools' };
+  }
+  if (IDLE.test(data)) {
+    return { status: 'idle', statusDetail: 'Waiting for input' };
+  }
+  return null;
+}
+// ──────────────────────────────────────────────────────────────────────────
+// parseTranscript — Grok stores messages in SQLite (~/.grok/grok.db), not
+// in a JSONL file. The adapter contract is `(raw: string) => Memory[]`, so
+// the caller (the memory-session-end hook, refactored in Sprint 45 T4) is
+// responsible for extracting `messages.message_json` rows from grok.db and
+// passing them in as a JSON string envelope. Two accepted shapes:
+//
+//   1. JSON array of message objects (preferred):
+//        '[{"role":"user","content":"hi"},{"role":"assistant","content":[...]}]'
+//   2. JSONL — one message JSON per line (back-compat with hooks that
+//      replay grok.db rows verbatim):
+//        '{"role":"user","content":"hi"}\n{"role":"assistant","content":[...]}'
+//
+// Both fall through to the same per-message loop. message.content matches
+// the AI SDK provider shape: string OR array of { type: 'text', text } |
+// { type: 'tool-call', ... } | { type: 'tool-result', ... }. We extract the
+// text parts only — tool calls and results are surfaced via the `tool_calls`
+// and `tool_results` tables in grok.db, which the hook layer treats
+// separately if it wants tool-trace memories.
+// ──────────────────────────────────────────────────────────────────────────
+function _extractText(content) {
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((c) => c && c.type === 'text' && typeof c.text === 'string')
+      .map((c) => c.text)
+      .join(' ');
+  }
+  return '';
+}
+function parseTranscript(raw) {
+  if (typeof raw !== 'string' || raw.length === 0) return [];
+  // Try JSON-array first — the preferred envelope.
+  let messages = null;
+  try {
+    const parsed = JSON.parse(raw);
+    if (Array.isArray(parsed)) messages = parsed;
+  } catch (_) { /* fall through to JSONL */ }
+  // JSONL fallback — line-by-line parse, skip malformed lines (matches
+  // Claude adapter's tolerance).
+  if (!messages) {
+    messages = [];
+    for (const line of raw.split('\n')) {
+      const trimmed = line.trim();
+      if (!trimmed) continue;
+      try {
+        const obj = JSON.parse(trimmed);
+        if (obj && typeof obj === 'object') messages.push(obj);
+      } catch (_) { continue; }
+    }
+  }
+  const out = [];
+  for (const msg of messages) {
+    if (!msg || typeof msg !== 'object') continue;
+    const role = msg.role;
+    if (role !== 'user' && role !== 'assistant') continue;
+    const text = _extractText(msg.content);
+    if (text) out.push({ role, content: text.slice(0, 400) });
+  }
+  return out;
+}
+// ──────────────────────────────────────────────────────────────────────────
+// bootPromptTemplate — Grok reads `AGENTS.md` (per docs/AGENT-RUNTIMES.md
+// § 4: convergent file with Codex via the sync-agent-instructions.js
+// generator). The boot block points the lane at AGENTS.md instead of
+// CLAUDE.md and uses the same `memory_recall + read instructional file +
+// read sprint docs` shape as Claude. Sprint 46 T2 will refine per-agent
+// boot prompts further; this is the contract-complete placeholder.
+// ──────────────────────────────────────────────────────────────────────────
+function bootPromptTemplate(lane = {}, sprint = {}) {
+  const tn = lane.id || 'T?';
+  const sprintNum = sprint.number || '?';
+  const sprintName = sprint.name || 'unnamed';
+  const project = lane.project || sprint.project || 'termdeck';
+  const briefing = lane.briefingPath || `docs/sprint-${sprintNum}-${sprintName}/${tn}-<lane>.md`;
+  const topic = lane.topic || lane.briefingPath || sprintName;
+  return [
+    `You are ${tn} in Sprint ${sprintNum} (${sprintName}). Boot sequence:`,
+    `1. memory_recall(project="${project}", query="${topic}")`,
+    `2. memory_recall(query="recent decisions and bugs")`,
+    `3. Read ~/.claude/CLAUDE.md and ./AGENTS.md`,
+    `4. Read docs/sprint-${sprintNum}-${sprintName}/PLANNING.md`,
+    `5. Read docs/sprint-${sprintNum}-${sprintName}/STATUS.md`,
+    `6. Read ${briefing}`,
+    '',
+    'Then begin. Stay in your lane. Post FINDING / FIX-PROPOSED / DONE in STATUS.md.',
+    "Don't bump versions, don't touch CHANGELOG, don't commit.",
+  ].join('\n');
+}
+// ──────────────────────────────────────────────────────────────────────────
+// Adapter export. spawn.env.GROK_MODEL defaults to the cheap-fast tier;
+// per-lane override is the launcher's job at session-spawn time (Sprint 46
+// reads `agent: grok` + optional `model-hint: code|reasoning-deep|...` from
+// the lane brief frontmatter and overlays). GROK_API_KEY isn't repeated in
+// spawn.env because the PTY inherits it from the TermDeck server's process
+// env; the secrets.env load at server boot is the canonical path.
+// ──────────────────────────────────────────────────────────────────────────
+const grokAdapter = {
+  name: 'grok',
+  sessionType: 'grok',
+  matches: (cmd) => typeof cmd === 'string' && /(?:^|\s|\/)grok(?:\b|$)/i.test(cmd),
+  spawn: {
+    binary: 'grok',
+    defaultArgs: [],
+    env: {
+      GROK_MODEL: chooseModel(),
+    },
+  },
+  patterns: {
+    prompt: PROMPT,
+    thinking: THINKING,
+    editing: EDITING,
+    tool: TOOL,
+    idle: IDLE,
+    error: ERROR,
+  },
+  patternNames: {
+    error: 'grok-error',
+    tool: 'grok-tool',
+  },
+  statusFor,
+  parseTranscript,
+  bootPromptTemplate,
+  costBand: 'subscription',
+};
+module.exports = grokAdapter;

package/packages/server/src/agent-adapters/index.js CHANGED Viewed

@@ -13,12 +13,18 @@
 // and Sprint 45 T4 wires the launcher UI through the same registry.
 const claude = require('./claude');
+const codex = require('./codex');
+const gemini = require('./gemini');
+const grok = require('./grok');
 // Keyed by adapter name (NOT session.meta.type — adapters expose their own
 // `sessionType` field for that mapping). Order is iteration order for the
 // detect loop in session.js, so list more-specific adapters before less.
 const AGENT_ADAPTERS = {
   claude,
+  codex,
+  gemini,
+  grok,
 };
 // Convenience accessor — returns the adapter whose `sessionType` matches the

package/packages/server/src/index.js CHANGED Viewed

@@ -8,7 +8,9 @@ const { WebSocketServer } = require('ws');
 const path = require('path');
 const os = require('os');
 const fs = require('fs');
+const dns = require('dns');
 const { v4: uuidv4 } = require('uuid');
+const { createCachedLookup, createFailureLogger } = require('./rumen-pool-resilience');
 // Conditional imports (graceful fallback if not installed yet)
 let pty, Database, pg;
@@ -19,10 +21,18 @@ try { pg = require('pg'); } catch { pg = null; }
 // Module-level singleton Postgres pool for rumen_insights (petvetbid DB).
 // Lazy-initialized on first rumen endpoint hit so startup stays fast and
 // servers without DATABASE_URL never pay the connection cost.
+//
+// DNS-resilience (Sprint 45 side-task): the pool is constructed with a
+// cached `lookup` function that retries DNS failures with jittered
+// exponential backoff and serves stale entries during transient outages.
+// Pool errors / recoveries flow through a recency-graded logger so a
+// flapping host doesn't flood the log.
 let _rumenPool = null;
 let _rumenPoolFailed = false;
 let _rumenPoolFailedAt = 0;
 const RUMEN_POOL_RETRY_MS = 30_000;
+const _rumenLookup = createCachedLookup(dns);
+const _rumenLogger = createFailureLogger(console);
 const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
 function getRumenPool() {
   if (_rumenPool) return _rumenPool;
@@ -38,14 +48,14 @@ function getRumenPool() {
       connectionString: process.env.DATABASE_URL,
       max: 4,
       idleTimeoutMillis: 30000,
-      connectionTimeoutMillis: 5000
-    });
-    _rumenPool.on('error', (err) => {
-      console.warn('[rumen] pg pool error:', err.message);
+      connectionTimeoutMillis: 5000,
+      lookup: _rumenLookup,
     });
+    _rumenPool.on('error', (err) => _rumenLogger.logFailure(`pg pool error: ${err.message}`));
+    _rumenPool.on('connect', () => _rumenLogger.logRecovery());
     return _rumenPool;
   } catch (err) {
-    console.warn('[rumen] failed to create pg pool:', err.message);
+    _rumenLogger.logFailure(`failed to create pg pool: ${err.message}`);
     _rumenPoolFailed = true;
     _rumenPoolFailedAt = Date.now();
     return null;
@@ -69,6 +79,7 @@ const { createGraphRoutes } = require('./graph-routes');
 const { createProjectsRoutes } = require('./projects-routes');
 const orchestrationPreview = require('./orchestration-preview');
 const { createPtyReaper } = require('./pty-reaper');
+const { AGENT_ADAPTERS } = require('./agent-adapters');
 // Sprint 37 T3 — lazy resolution of T2's CLI modules. The orchestration-preview
 // helper is decoupled from T2's templates.js / init-project.js; we resolve
@@ -1244,6 +1255,28 @@ function createServer(config) {
     res.json(t);
   });
+  // GET /api/agent-adapters - serializable projection of the multi-agent
+  // registry for the launcher. Sprint 45 T4: replaces the hardcoded
+  // claude/cc/gemini/python branches in app.js with a registry-driven
+  // detector. Each entry exposes only the fields the client needs:
+  //   • name        — adapter id ("claude", "codex", "gemini", "grok")
+  //   • sessionType — meta.type the launcher should set
+  //   • binary      — canonical command name; client matches `^binary\b` (i)
+  //   • costBand    — 'free' | 'pay-per-token' | 'subscription' (Sprint 46
+  //                   surfaces this in PLANNING.md cost annotations)
+  // Functions / RegExps are NOT serialized — match logic lives client-side
+  // and uses the binary as the prefix anchor. Adapter-specific shorthand
+  // (e.g. `cc` → `claude`) is normalized in app.js before this lookup.
+  app.get('/api/agent-adapters', (req, res) => {
+    const list = Object.values(AGENT_ADAPTERS).map((a) => ({
+      name: a.name,
+      sessionType: a.sessionType,
+      binary: a.spawn && a.spawn.binary,
+      costBand: a.costBand,
+    }));
+    res.json(list);
+  });
   // Public-shape helper so GET and PATCH return the same envelope.
   function publicConfigPayload() {
     return {
@@ -1650,10 +1683,16 @@ function createServer(config) {
     if (!pool) return res.json({ enabled: false });
     try {
+      // Sprint 45 side-task 2 — order by COALESCE(started_at, completed_at) so
+      // jobs whose upstream writer (the @jhizzard/rumen createJob INSERT in the
+      // Edge Function) leaves started_at NULL still surface as "latest" via
+      // their populated completed_at. Pre-fix the query returned a 2026-04-16
+      // job permanently because that was the last row to have started_at
+      // populated — every subsequent insert lands started_at = NULL.
       const jobSql =
         `SELECT id, status, completed_at, sessions_processed, insights_generated
            FROM rumen_jobs
-           ORDER BY started_at DESC
+           ORDER BY COALESCE(started_at, completed_at) DESC NULLS LAST
            LIMIT 1`;
       const insightSql =
         `SELECT

package/packages/server/src/rumen-pool-resilience.js ADDED Viewed

@@ -0,0 +1,111 @@
+// Sprint 45 side-task — DNS-resilience policy for the rumen pg.Pool.
+//
+// Two factories, both DI-friendly so tests can stub dns + console:
+//
+//   createCachedLookup(dnsModule, opts)
+//     Returns a (hostname, options, callback) function suitable for
+//     pg.Pool's `lookup` config. Caches successful lookups for
+//     `cacheTtlMs` (default 30s). On lookup failure, retries with
+//     jittered exponential backoff up to `backoffCapsMs.length`
+//     attempts (default [100, 500, 2000, 5000]). If every retry fails
+//     and a stale cached address exists, serves stale rather than
+//     failing — DNS flickers shouldn't tear the pool down.
+//
+//   createFailureLogger(consoleModule, opts)
+//     Returns { logFailure, logRecovery } closures owning a private
+//     failure-window state. First failure logs `warn`; consecutive
+//     failures within `windowMs` (default 60s) downgrade to `debug`;
+//     a recovery after any prior failure logs `info` once and clears
+//     the window. Idempotent recovery (no failures pending) is silent.
+//
+// Both factories are pure — no module-scope state, no side effects on
+// require — so tests can construct fresh instances per case.
+'use strict';
+const DEFAULT_BACKOFF_CAPS_MS = [100, 500, 2000, 5000];
+const DEFAULT_DNS_CACHE_TTL_MS = 30_000;
+const DEFAULT_FAILURE_WINDOW_MS = 60_000;
+function _jitter(capMs, rng) {
+  return Math.floor(capMs * (0.5 + rng() * 0.5));
+}
+function createCachedLookup(dnsModule, opts = {}) {
+  const cacheTtlMs = opts.cacheTtlMs ?? DEFAULT_DNS_CACHE_TTL_MS;
+  const backoffCapsMs = opts.backoffCapsMs ?? DEFAULT_BACKOFF_CAPS_MS;
+  const setTimeoutFn = opts.setTimeout ?? setTimeout;
+  const now = opts.now ?? Date.now;
+  const rng = opts.random ?? Math.random;
+  const cache = new Map();
+  return function cachedLookup(hostname, options, callback) {
+    if (typeof options === 'function') { callback = options; options = {}; }
+    const t = now();
+    const hit = cache.get(hostname);
+    if (hit && hit.expiresAt > t) {
+      return callback(null, hit.address, hit.family);
+    }
+    let attempt = 0;
+    const tryOnce = () => {
+      dnsModule.lookup(hostname, options, (err, address, family) => {
+        if (!err) {
+          cache.set(hostname, { address, family, expiresAt: now() + cacheTtlMs });
+          return callback(null, address, family);
+        }
+        if (attempt >= backoffCapsMs.length) {
+          if (hit) return callback(null, hit.address, hit.family);
+          return callback(err);
+        }
+        const delay = _jitter(backoffCapsMs[attempt++], rng);
+        setTimeoutFn(tryOnce, delay);
+      });
+    };
+    tryOnce();
+  };
+}
+function createFailureLogger(consoleModule, opts = {}) {
+  const windowMs = opts.windowMs ?? DEFAULT_FAILURE_WINDOW_MS;
+  const prefix = opts.prefix ?? '[rumen]';
+  const now = opts.now ?? Date.now;
+  let firstAt = 0;
+  let lastAt = 0;
+  let count = 0;
+  function logFailure(message) {
+    const t = now();
+    if (firstAt > 0 && (t - lastAt) < windowMs) {
+      count += 1;
+      lastAt = t;
+      const debug = consoleModule.debug || consoleModule.log;
+      debug(`${prefix} (debounced ${count}) ${message}`);
+      return;
+    }
+    firstAt = t;
+    lastAt = t;
+    count = 1;
+    consoleModule.warn(`${prefix} ${message}`);
+  }
+  function logRecovery(message) {
+    if (firstAt === 0) return;
+    const info = consoleModule.info || consoleModule.log;
+    info(`${prefix} recovered after ${count} failure(s)${message ? ` — ${message}` : ''}`);
+    firstAt = 0;
+    lastAt = 0;
+    count = 0;
+  }
+  function _state() { return { firstAt, lastAt, count }; }
+  return { logFailure, logRecovery, _state };
+}
+module.exports = {
+  createCachedLookup,
+  createFailureLogger,
+  DEFAULT_BACKOFF_CAPS_MS,
+  DEFAULT_DNS_CACHE_TTL_MS,
+  DEFAULT_FAILURE_WINDOW_MS,
+};

package/packages/server/src/session.js CHANGED Viewed

@@ -15,7 +15,7 @@ const os = require('os');
 const path = require('path');
 const { resolveTheme } = require('./theme-resolver');
 const flashbackDiag = require('./flashback-diag');
-const claudeAdapter = require('./agent-adapters/claude');
+const geminiAdapter = require('./agent-adapters/gemini');
 const { detectAdapter, getAdapterForSessionType } = require('./agent-adapters');
 // Strip ANSI escape codes for pattern matching
@@ -29,24 +29,31 @@ function stripAnsi(str) {
 // Pattern matchers for detecting terminal type and status.
 //
-// Sprint 44 T3: claudeCode patterns are owned by the Claude adapter at
-// ./agent-adapters/claude.js. This object continues to expose them under
-// the legacy `PATTERNS.claudeCode.*` shape so external callers
-// (tests/rcfile-noise.test.js, tests/analyzer-error-fixtures.test.js, the
-// rcfile-noise analyze.js fixture script) keep working without import
-// changes. Sprint 45 T4 removes this shim — new code should consume the
-// adapter directly via require('./agent-adapters/claude').
+// Sprint 45 T4 removed the Sprint 44 T3 Claude shim (`PATTERNS.claudeCode.*`
+// and `PATTERNS.errorLineStart`). Claude-specific regexes now live exclusively
+// at ./agent-adapters/claude.js — read via `claudeAdapter.patterns.*`. The
+// `_detectErrors` and `_updateStatus` paths route through `getAdapterForSessionType`
+// for any registered adapter.
+//
+// Sprint 45 T2 retains `PATTERNS.geminiCli` as a shim into the Gemini adapter
+// for the one-release deprecation horizon — same pattern Sprint 44 T3 used.
+// What stays in this file:
+//   • geminiCli       — Sprint 45 T2 shim into ./agent-adapters/gemini.js
+//   • pythonServer    — server SUBTYPE detection (no adapter; status-badge only)
+//   • shell           — default fallback (no adapter)
+//   • error           — cross-agent prose-shape primary error fallback (used
+//                       by `_detectErrors` when the active adapter has no
+//                       `patterns.error`, and exported for tests)
+//   • shellError      — cross-agent Unix shell-error shapes (always tried as
+//                       the secondary fallback in `_detectErrors`)
 const PATTERNS = {
-  claudeCode: {
-    prompt: claudeAdapter.patterns.prompt,
-    thinking: claudeAdapter.patterns.thinking,
-    editing: claudeAdapter.patterns.editing,
-    tool: claudeAdapter.patterns.tool,
-    idle: claudeAdapter.patterns.idle
-  },
+  // Sprint 45 T2: geminiCli patterns are owned by the Gemini adapter at
+  // ./agent-adapters/gemini.js. Shim preserves the legacy
+  // `PATTERNS.geminiCli.{prompt,thinking}` shape — same regex objects, so
+  // any external reference equality holds.
   geminiCli: {
-    prompt: /^gemini>\s/m,
-    thinking: /\b(Generating|Working)\b/,
+    prompt: geminiAdapter.patterns.prompt,
+    thinking: geminiAdapter.patterns.thinking,
   },
   pythonServer: {
     uvicorn: /Uvicorn running on/,
@@ -90,17 +97,12 @@ const PATTERNS = {
   // child-process error reporting fire without depending on the line ALSO
   // containing the `No such file or directory` prose phrase.
   error: /(?:^|\n)\s*(?:Error:\s+\S|error:\s+\S|ERROR:\s+\S|Traceback \(most recent call last\):|npm ERR!|error\[E\d+\]:|Uncaught Exception|Fatal:|ENOENT:\s+\S|EACCES:\s+\S|ECONNREFUSED:\s+\S)/m,
-  // Stricter line-anchored variant for Claude Code, whose tool output (grep
-  // results, test logs, file contents) routinely mentions "Error" mid-line
-  // without representing an actual failure of the agent itself.
-  // Sprint 40 T2: added mixed-case `Fatal` (mirrors `fatal` / `FATAL`) and
-  // the `npm ERR!` shape (special-cased outside the alternation because
-  // `!` is not a word character so `\b` after `npm ERR!` doesn't match).
-  // Sprint 44 T3: this regex is now owned by the Claude adapter
-  // (./agent-adapters/claude.js patterns.error). The shim below preserves
-  // the legacy PATTERNS.errorLineStart export — same regex object, so any
-  // existing reference equality (e.g. `=== PATTERNS.errorLineStart`) holds.
-  errorLineStart: claudeAdapter.patterns.error,
+  // Sprint 45 T4: the Claude-specific line-anchored variant
+  // (formerly `PATTERNS.errorLineStart`) is owned by the Claude adapter at
+  // ./agent-adapters/claude.js — read via `claudeAdapter.patterns.error`.
+  // _detectErrors below routes through `getAdapterForSessionType` for
+  // claude-code sessions and falls through to PATTERNS.error / shellError
+  // for non-adapter sessions.
   // Sprint 33: PATTERNS.error misses the most common Unix shell errors —
   // `cat: /foo: No such file or directory`, `bash: foo: command not found`,
   // `rm: cannot remove ...: Permission denied`. These have a colon-prefix
@@ -255,20 +257,17 @@ class Session {
   }
   _detectType(data) {
-    // Sprint 44 T3: registry-aware detection. detectAdapter() iterates
-    // AGENT_ADAPTERS in declaration order and returns the first hit by
-    // prompt regex OR command-string match. Sprint 44 lands Claude only
-    // (so this returns the Claude adapter or undefined); Sprint 45 adds
-    // Codex / Gemini / Grok adapters and the gemini fall-through below
-    // moves into gemini.js.
+    // Sprint 44 T3 + Sprint 45: registry-aware detection. detectAdapter()
+    // iterates AGENT_ADAPTERS in declaration order and returns the first
+    // hit by prompt regex OR command-string match. Claude / Codex /
+    // Gemini / Grok all live in the registry now; only python-server
+    // (a non-CLI-agent type) stays here as in-file fall-through.
     const adapter = detectAdapter(data, this.meta.command);
     if (adapter) {
       this.meta.type = adapter.sessionType;
       return;
     }
-    if (PATTERNS.geminiCli.prompt.test(data) || /gemini/i.test(this.meta.command)) {
-      this.meta.type = 'gemini';
-    } else if (
+    if (
       PATTERNS.pythonServer.uvicorn.test(data) ||
       PATTERNS.pythonServer.flask.test(data) ||
       PATTERNS.pythonServer.django.test(data) ||
@@ -282,12 +281,11 @@ class Session {
     const p = PATTERNS;
     const oldStatus = this.meta.status;
-    // Sprint 44 T3: claude-code status detection now lives in the Claude
+    // Sprint 44 T3 + Sprint 45: per-agent status detection lives in each
     // adapter's `statusFor(data)` method. Returns { status, statusDetail }
     // on a match, null on no-change — preserves the original switch's
-    // "leave status untouched if no claude pattern fires" semantics.
-    // Other types (gemini, python-server, default shell) stay in-file
-    // until Sprint 45 migrates them.
+    // "leave status untouched if no pattern fires" semantics. Only
+    // non-CLI-agent types (python-server + default shell) stay in-file.
     const adapter = getAdapterForSessionType(this.meta.type);
     if (adapter && typeof adapter.statusFor === 'function') {
       const result = adapter.statusFor(data);
@@ -297,16 +295,6 @@ class Session {
       }
     } else {
       switch (this.meta.type) {
-      case 'gemini':
-        if (p.geminiCli.thinking.test(data)) {
-          this.meta.status = 'thinking';
-          this.meta.statusDetail = 'Gemini is generating...';
-        } else if (p.geminiCli.prompt.test(data)) {
-          this.meta.status = 'idle';
-          this.meta.statusDetail = 'Waiting for input';
-        }
-        break;
       case 'python-server':
         if (p.pythonServer.request.test(data)) {
           this.meta.status = 'active';
@@ -409,12 +397,12 @@ class Session {
     // (grep matches, test results, log dumps). Use a line-anchored pattern
     // for that session type so we don't flag content as failure.
     //
-    // Sprint 44 T3: per-agent primary error pattern is now read off the
-    // adapter (`patterns.error` + `patternNames.error`). Falls back to the
-    // generic prose-shape PATTERNS.error when no adapter has claimed the
-    // session type. The Claude adapter's `patterns.error` IS the same regex
-    // object as PATTERNS.errorLineStart (the shim wires them together), so
-    // existing `=== PATTERNS.errorLineStart` reference checks still hold.
+    // Sprint 44 T3 / Sprint 45 T4: per-agent primary error pattern is read
+    // off the adapter (`patterns.error` + `patternNames.error`). Falls back
+    // to the generic prose-shape PATTERNS.error when no adapter has claimed
+    // the session type. (Sprint 44 retained a `PATTERNS.errorLineStart` shim
+    // that pointed at the Claude adapter's regex; Sprint 45 T4 removed the
+    // shim — read `claudeAdapter.patterns.error` directly when needed.)
     const adapter = getAdapterForSessionType(this.meta.type);
     const primaryPattern = adapter && adapter.patterns && adapter.patterns.error
       ? adapter.patterns.error