@jhizzard/termdeck 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/packages/client/public/app.js +60 -16
- package/packages/server/src/agent-adapters/claude.js +158 -0
- package/packages/server/src/agent-adapters/codex.js +199 -0
- package/packages/server/src/agent-adapters/gemini.js +158 -0
- package/packages/server/src/agent-adapters/grok-models.js +115 -0
- package/packages/server/src/agent-adapters/grok.js +253 -0
- package/packages/server/src/agent-adapters/index.js +61 -0
- package/packages/server/src/index.js +45 -6
- package/packages/server/src/rumen-pool-resilience.js +111 -0
- package/packages/server/src/session.js +72 -53
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
// Grok model selection — Sprint 45 T3
|
|
2
|
+
//
|
|
3
|
+
// `grok-dev` (the superagent-ai CLI) ships an 11-model lineup spanning
|
|
4
|
+
// $0.2/$0.5 cheap-fast tiers up to $3/$15 flagship. The wrong default
|
|
5
|
+
// silently 10x's a bill on routine tasks: a "look at this file and tell me
|
|
6
|
+
// what's wrong" lane on `grok-4.20-0309-reasoning` (Heavy, $2/$6) costs the
|
|
7
|
+
// same as ten lanes on `grok-4-1-fast-non-reasoning`. The orchestrator picks
|
|
8
|
+
// per-lane via `chooseModel(taskHint)` at boot-prompt construction time
|
|
9
|
+
// (see SPRINT-45-PREP-NOTES.md § "Concern 2: Model selection heuristic").
|
|
10
|
+
// The adapter's `spawn.env.GROK_MODEL` defaults to the cheap-fast model and
|
|
11
|
+
// is overridden per-lane by the launcher.
|
|
12
|
+
//
|
|
13
|
+
// Tier table (price = USD per 1M tokens, in/out):
|
|
14
|
+
//
|
|
15
|
+
// tier | model id | price | use case
|
|
16
|
+
// ───────────────────┼───────────────────────────────────┼──────────┼──────────────────────
|
|
17
|
+
// fast-non-reasoning | grok-4-1-fast-non-reasoning | $0.2/0.5 | DEFAULT — routine
|
|
18
|
+
// fast-reasoning | grok-4-1-fast-reasoning | $0.2/0.5 | light CoT under budget
|
|
19
|
+
// code | grok-code-fast-1 | $0.2/1.5 | code gen / refactor
|
|
20
|
+
// reasoning-deep | grok-4.20-0309-reasoning | $2/6 | hard problems, audit
|
|
21
|
+
// reasoning-non-cot | grok-4.20-0309-non-reasoning | $2/6 | high-quality non-CoT
|
|
22
|
+
// multi-agent | grok-4.20-multi-agent-0309 | $2/6 | parallel sub-agent fan-out
|
|
23
|
+
// flagship | grok-4-0709 | $3/15 | when Heavy isn't enough
|
|
24
|
+
// budget-compact | grok-3-mini | $0.3/0.5 | rare — usually wrong
|
|
25
|
+
//
|
|
26
|
+
// `grok-4-fast-non-reasoning`, `grok-4-fast-reasoning`, and `grok-3` are
|
|
27
|
+
// legacy aliases retained for completeness but not in the heuristic switch.
|
|
28
|
+
|
|
29
|
+
'use strict';
|
|
30
|
+
|
|
31
|
+
// Canonical model ids. Use the symbolic key in code; the heuristic resolves
|
|
32
|
+
// to the live id below. Keep these as data, not constants — Sprint 46+ may
|
|
33
|
+
// gain a `taskHint -> model` override file in `~/.termdeck/`.
|
|
34
|
+
const MODELS = {
|
|
35
|
+
'fast-non-reasoning': 'grok-4-1-fast-non-reasoning',
|
|
36
|
+
'fast-reasoning': 'grok-4-1-fast-reasoning',
|
|
37
|
+
'code': 'grok-code-fast-1',
|
|
38
|
+
'reasoning-deep': 'grok-4.20-0309-reasoning',
|
|
39
|
+
'reasoning-non-cot': 'grok-4.20-0309-non-reasoning',
|
|
40
|
+
'multi-agent': 'grok-4.20-multi-agent-0309',
|
|
41
|
+
'flagship': 'grok-4-0709',
|
|
42
|
+
'budget-compact': 'grok-3-mini',
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
// Legacy aliases — accepted as input to chooseModel for back-compat with
|
|
46
|
+
// Joshua's earlier `grok models` outputs. Resolution table:
|
|
47
|
+
const LEGACY_ALIASES = {
|
|
48
|
+
'grok-4-fast-non-reasoning': MODELS['fast-non-reasoning'],
|
|
49
|
+
'grok-4-fast-reasoning': MODELS['fast-reasoning'],
|
|
50
|
+
'grok-beta': MODELS['reasoning-deep'],
|
|
51
|
+
'grok-4.20-multi-agent': MODELS['multi-agent'],
|
|
52
|
+
'grok-3': MODELS['flagship'],
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// chooseModel — orchestrator-side heuristic. Pass `taskHint` from the lane
|
|
56
|
+
// brief (Sprint 46 frontmatter `model-hint: code|reasoning-deep|...`) or omit
|
|
57
|
+
// for the cheap-fast default. Unknown hints fall back to the default rather
|
|
58
|
+
// than throwing — the bill consequence of a typo silently routing to Heavy
|
|
59
|
+
// is worse than the latency hit of cheap-fast on a hard task.
|
|
60
|
+
function chooseModel(taskHint) {
|
|
61
|
+
switch (taskHint) {
|
|
62
|
+
case 'code':
|
|
63
|
+
return MODELS.code;
|
|
64
|
+
case 'multi-agent':
|
|
65
|
+
return MODELS['multi-agent'];
|
|
66
|
+
case 'reasoning-deep':
|
|
67
|
+
return MODELS['reasoning-deep'];
|
|
68
|
+
case 'reasoning-quick':
|
|
69
|
+
case 'fast-reasoning':
|
|
70
|
+
return MODELS['fast-reasoning'];
|
|
71
|
+
case 'reasoning-non-cot':
|
|
72
|
+
return MODELS['reasoning-non-cot'];
|
|
73
|
+
case 'flagship':
|
|
74
|
+
return MODELS.flagship;
|
|
75
|
+
case 'budget-compact':
|
|
76
|
+
return MODELS['budget-compact'];
|
|
77
|
+
case 'fast-non-reasoning':
|
|
78
|
+
case undefined:
|
|
79
|
+
case null:
|
|
80
|
+
case '':
|
|
81
|
+
return MODELS['fast-non-reasoning'];
|
|
82
|
+
default:
|
|
83
|
+
// Accept legacy aliases verbatim; otherwise fall back to cheap-fast.
|
|
84
|
+
if (LEGACY_ALIASES[taskHint]) return LEGACY_ALIASES[taskHint];
|
|
85
|
+
return MODELS['fast-non-reasoning'];
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// getModelInfo — for the launcher / dashboard cost annotations (Sprint 46).
|
|
90
|
+
// Returns the price band so the UI can render a $-tier indicator alongside
|
|
91
|
+
// the model name without each caller knowing the table.
|
|
92
|
+
function getModelInfo(modelId) {
|
|
93
|
+
const cheap = new Set([
|
|
94
|
+
MODELS['fast-non-reasoning'],
|
|
95
|
+
MODELS['fast-reasoning'],
|
|
96
|
+
MODELS.code,
|
|
97
|
+
]);
|
|
98
|
+
const heavy = new Set([
|
|
99
|
+
MODELS['reasoning-deep'],
|
|
100
|
+
MODELS['reasoning-non-cot'],
|
|
101
|
+
MODELS['multi-agent'],
|
|
102
|
+
]);
|
|
103
|
+
if (cheap.has(modelId)) return { tier: 'cheap', priceIn: 0.2, priceOut: modelId === MODELS.code ? 1.5 : 0.5 };
|
|
104
|
+
if (heavy.has(modelId)) return { tier: 'heavy', priceIn: 2, priceOut: 6 };
|
|
105
|
+
if (modelId === MODELS.flagship) return { tier: 'flagship', priceIn: 3, priceOut: 15 };
|
|
106
|
+
if (modelId === MODELS['budget-compact']) return { tier: 'budget', priceIn: 0.3, priceOut: 0.5 };
|
|
107
|
+
return { tier: 'unknown', priceIn: null, priceOut: null };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
module.exports = {
|
|
111
|
+
MODELS,
|
|
112
|
+
LEGACY_ALIASES,
|
|
113
|
+
chooseModel,
|
|
114
|
+
getModelInfo,
|
|
115
|
+
};
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
// Grok adapter (superagent-ai grok-dev CLI) — Sprint 45 T3
|
|
2
|
+
//
|
|
3
|
+
// Implements the 7-field adapter contract documented in ./claude.js and
|
|
4
|
+
// docs/AGENT-RUNTIMES.md § 5. TUI mode by default — conversation persists
|
|
5
|
+
// inside the PTY process for the lifetime of the panel, matching the Claude
|
|
6
|
+
// Code pattern. Headless `grok --prompt` is reserved for orchestrator
|
|
7
|
+
// background tasks (Sprint 46+) and is NOT this adapter's spawn shape.
|
|
8
|
+
//
|
|
9
|
+
// Lane-time empirical findings (Sprint 45 T3, 2026-05-01) — see
|
|
10
|
+
// docs/multi-agent-substrate/SPRINT-45-PREP-NOTES.md and Sprint 45 STATUS.md
|
|
11
|
+
// for the full investigation:
|
|
12
|
+
//
|
|
13
|
+
// • grok-dev v1.1.5, binary `/usr/local/bin/grok` (#!/usr/bin/env bun)
|
|
14
|
+
// • Session storage: SQLite at ~/.grok/grok.db, NOT JSON files in
|
|
15
|
+
// ~/.grok/sessions/. Tables (STRICT, requires SQLite ≥3.37):
|
|
16
|
+
// sessions(id, workspace_id, title, model, mode, status, created_at, ...)
|
|
17
|
+
// messages(session_id, seq, role, message_json, created_at)
|
|
18
|
+
// tool_calls, tool_results, usage_events, compactions
|
|
19
|
+
// `messages.message_json` is a JSON blob in AI SDK provider shape:
|
|
20
|
+
// { role: 'user'|'assistant'|'tool', content: string | Array<...> }
|
|
21
|
+
// where array parts are { type: 'text', text } | { type: 'tool-call', ... }
|
|
22
|
+
// | { type: 'tool-result', ... }. Sprint 45 T4 wires the memory hook to
|
|
23
|
+
// extract from grok.db and feed parseTranscript a JSON envelope.
|
|
24
|
+
//
|
|
25
|
+
// • TUI shimmer text strings (the canonical "thinking" indicator):
|
|
26
|
+
// "Planning next moves" — default isProcessing without stream content
|
|
27
|
+
// "Generating plan..." — plan-mode label
|
|
28
|
+
// "Answering…" — /btw overlay
|
|
29
|
+
// • Tool indicators: TUI renders `→ <label>` (InlineTool component);
|
|
30
|
+
// headless mode emits `▸ <label>`. Both forms accepted.
|
|
31
|
+
// • Sub-agents: 5 built-in (general / explore / vision / verify / computer)
|
|
32
|
+
// plus up to 12 user-defined customs on grok-4.20-multi-agent-0309
|
|
33
|
+
// (16-agent ceiling). Sub-agent fan-out is internal to grok-dev — the
|
|
34
|
+
// adapter doesn't need to surface per-sub-agent status; the parent CLI
|
|
35
|
+
// emits SubagentTaskLine entries that show through as inline tool calls.
|
|
36
|
+
// • Empty-state placeholder: "Message Grok…" — used only as a weak idle
|
|
37
|
+
// hint, not a load-bearing pattern.
|
|
38
|
+
//
|
|
39
|
+
// Cost band: 'subscription'. Joshua's SuperGrok Heavy carries the rate
|
|
40
|
+
// limits; non-Heavy users supply GROK_API_KEY / XAI_API_KEY via secrets.env
|
|
41
|
+
// (which the spawn inherits from process.env automatically — no need to
|
|
42
|
+
// re-list it in spawn.env).
|
|
43
|
+
|
|
44
|
+
'use strict';
|
|
45
|
+
|
|
46
|
+
const { chooseModel } = require('./grok-models');
|
|
47
|
+
|
|
48
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
49
|
+
// Patterns — observed from grok-dev@1.1.5 source (dist/ui/app.js) plus
|
|
50
|
+
// Joshua's smoke test on 2026-05-01. TUI is OpenTUI/React-rendered with
|
|
51
|
+
// frequent redraws; patterns must survive ANSI strip and partial chunks.
|
|
52
|
+
// Conservative bias: false negatives (missed status updates) are cheaper
|
|
53
|
+
// than false positives (badge flapping or spurious 'errored' status).
|
|
54
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
// Prompt indicator — the TUI's empty-state placeholder. Weak signal but the
|
|
57
|
+
// only stable string that appears reliably in TUI output. Sprint 46 T4 may
|
|
58
|
+
// refine if a more precise marker is observed.
|
|
59
|
+
const PROMPT = /Message Grok[….]/;
|
|
60
|
+
|
|
61
|
+
// Thinking — Grok's three known "isProcessing" shimmer states. Hits any of
|
|
62
|
+
// the literal labels. The trailing variants on "Generating" / "Answering"
|
|
63
|
+
// cover both ASCII `...` and Unicode ellipsis.
|
|
64
|
+
const THINKING = /Planning next moves|Generating plan[….]|Answering[….]/;
|
|
65
|
+
|
|
66
|
+
// Tool — TUI inline-tool prefix `→ ` (in box layout) OR headless `▸ `
|
|
67
|
+
// (yellow ANSI in dist/headless/output.js:23). Anchored on the leading
|
|
68
|
+
// glyph + space to avoid mid-line `→` in prose markdown firing as a tool.
|
|
69
|
+
// Also catches the activity strings emitted by long-running tools.
|
|
70
|
+
const TOOL = /(?:^|\n)\s*[→▸]\s|Running command[….]|Starting process[….]/;
|
|
71
|
+
|
|
72
|
+
// Editing — Grok's TUI prefixes file-mutation tool calls with `Edit` /
|
|
73
|
+
// `Write` / `Read` / `Run` labels rendered through InlineTool. Match these
|
|
74
|
+
// after the tool glyph; the toolLabel function uses these verbatim.
|
|
75
|
+
const EDITING = /(?:^|\n)\s*[→▸]\s+(Edit|Write|Read|Run|Create|Update|Delete)\b/;
|
|
76
|
+
const EDITING_DETAIL = /(?:^|\n)\s*[→▸]\s+((?:Edit|Write|Read|Run|Create|Update|Delete)\b[^\n]*)/;
|
|
77
|
+
|
|
78
|
+
// Idle — empty-state shows the placeholder and the cwd footer line. Use the
|
|
79
|
+
// placeholder only — cwd shape varies by terminal width and home expansion.
|
|
80
|
+
const IDLE = /Message Grok[….]\s*$/m;
|
|
81
|
+
|
|
82
|
+
// Error — line-anchored variant matching Claude's strategy. Grok's tool
|
|
83
|
+
// output (grep, test logs, lsp diagnostics) routinely carries "Error" /
|
|
84
|
+
// "error" mid-line in a way that should NOT flip the panel to errored. Only
|
|
85
|
+
// fire on line-leading failure phrases — same conservative shape as Claude
|
|
86
|
+
// uses, plus the Grok-specific BtwOverlay error fallback "Something went
|
|
87
|
+
// wrong." literal (rendered in t.diffRemovedFg).
|
|
88
|
+
const ERROR = /(?:^|\n)\s*(?:(?:error|Error|ERROR|exception|Exception|Traceback|fatal|Fatal|FATAL|panic|EACCES|ECONNREFUSED|ENOENT|command not found|cannot find module|failed with exit code|Permission denied|Something went wrong)\b)/m;
|
|
89
|
+
|
|
90
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
91
|
+
// statusFor — replaces the absent grok branch in session.js _updateStatus.
|
|
92
|
+
// Order matches Claude's: thinking → editing → tool → idle. First match
|
|
93
|
+
// wins. Returns null on no-match so the caller leaves status untouched
|
|
94
|
+
// (preserves the "no fallthrough" semantics _updateStatus relies on).
|
|
95
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
function statusFor(data) {
|
|
98
|
+
if (typeof data !== 'string') return null;
|
|
99
|
+
if (THINKING.test(data)) {
|
|
100
|
+
return { status: 'thinking', statusDetail: 'Grok is reasoning...' };
|
|
101
|
+
}
|
|
102
|
+
if (EDITING.test(data)) {
|
|
103
|
+
const match = data.match(EDITING_DETAIL);
|
|
104
|
+
return {
|
|
105
|
+
status: 'editing',
|
|
106
|
+
statusDetail: match ? match[1].slice(0, 80) : 'Editing files',
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
if (TOOL.test(data)) {
|
|
110
|
+
return { status: 'active', statusDetail: 'Using tools' };
|
|
111
|
+
}
|
|
112
|
+
if (IDLE.test(data)) {
|
|
113
|
+
return { status: 'idle', statusDetail: 'Waiting for input' };
|
|
114
|
+
}
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
119
|
+
// parseTranscript — Grok stores messages in SQLite (~/.grok/grok.db), not
|
|
120
|
+
// in a JSONL file. The adapter contract is `(raw: string) => Memory[]`, so
|
|
121
|
+
// the caller (the memory-session-end hook, refactored in Sprint 45 T4) is
|
|
122
|
+
// responsible for extracting `messages.message_json` rows from grok.db and
|
|
123
|
+
// passing them in as a JSON string envelope. Two accepted shapes:
|
|
124
|
+
//
|
|
125
|
+
// 1. JSON array of message objects (preferred):
|
|
126
|
+
// '[{"role":"user","content":"hi"},{"role":"assistant","content":[...]}]'
|
|
127
|
+
// 2. JSONL — one message JSON per line (back-compat with hooks that
|
|
128
|
+
// replay grok.db rows verbatim):
|
|
129
|
+
// '{"role":"user","content":"hi"}\n{"role":"assistant","content":[...]}'
|
|
130
|
+
//
|
|
131
|
+
// Both fall through to the same per-message loop. message.content matches
|
|
132
|
+
// the AI SDK provider shape: string OR array of { type: 'text', text } |
|
|
133
|
+
// { type: 'tool-call', ... } | { type: 'tool-result', ... }. We extract the
|
|
134
|
+
// text parts only — tool calls and results are surfaced via the `tool_calls`
|
|
135
|
+
// and `tool_results` tables in grok.db, which the hook layer treats
|
|
136
|
+
// separately if it wants tool-trace memories.
|
|
137
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
function _extractText(content) {
|
|
140
|
+
if (typeof content === 'string') return content;
|
|
141
|
+
if (Array.isArray(content)) {
|
|
142
|
+
return content
|
|
143
|
+
.filter((c) => c && c.type === 'text' && typeof c.text === 'string')
|
|
144
|
+
.map((c) => c.text)
|
|
145
|
+
.join(' ');
|
|
146
|
+
}
|
|
147
|
+
return '';
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function parseTranscript(raw) {
|
|
151
|
+
if (typeof raw !== 'string' || raw.length === 0) return [];
|
|
152
|
+
|
|
153
|
+
// Try JSON-array first — the preferred envelope.
|
|
154
|
+
let messages = null;
|
|
155
|
+
try {
|
|
156
|
+
const parsed = JSON.parse(raw);
|
|
157
|
+
if (Array.isArray(parsed)) messages = parsed;
|
|
158
|
+
} catch (_) { /* fall through to JSONL */ }
|
|
159
|
+
|
|
160
|
+
// JSONL fallback — line-by-line parse, skip malformed lines (matches
|
|
161
|
+
// Claude adapter's tolerance).
|
|
162
|
+
if (!messages) {
|
|
163
|
+
messages = [];
|
|
164
|
+
for (const line of raw.split('\n')) {
|
|
165
|
+
const trimmed = line.trim();
|
|
166
|
+
if (!trimmed) continue;
|
|
167
|
+
try {
|
|
168
|
+
const obj = JSON.parse(trimmed);
|
|
169
|
+
if (obj && typeof obj === 'object') messages.push(obj);
|
|
170
|
+
} catch (_) { continue; }
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const out = [];
|
|
175
|
+
for (const msg of messages) {
|
|
176
|
+
if (!msg || typeof msg !== 'object') continue;
|
|
177
|
+
const role = msg.role;
|
|
178
|
+
if (role !== 'user' && role !== 'assistant') continue;
|
|
179
|
+
const text = _extractText(msg.content);
|
|
180
|
+
if (text) out.push({ role, content: text.slice(0, 400) });
|
|
181
|
+
}
|
|
182
|
+
return out;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
186
|
+
// bootPromptTemplate — Grok reads `AGENTS.md` (per docs/AGENT-RUNTIMES.md
|
|
187
|
+
// § 4: convergent file with Codex via the sync-agent-instructions.js
|
|
188
|
+
// generator). The boot block points the lane at AGENTS.md instead of
|
|
189
|
+
// CLAUDE.md and uses the same `memory_recall + read instructional file +
|
|
190
|
+
// read sprint docs` shape as Claude. Sprint 46 T2 will refine per-agent
|
|
191
|
+
// boot prompts further; this is the contract-complete placeholder.
|
|
192
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
193
|
+
|
|
194
|
+
function bootPromptTemplate(lane = {}, sprint = {}) {
|
|
195
|
+
const tn = lane.id || 'T?';
|
|
196
|
+
const sprintNum = sprint.number || '?';
|
|
197
|
+
const sprintName = sprint.name || 'unnamed';
|
|
198
|
+
const project = lane.project || sprint.project || 'termdeck';
|
|
199
|
+
const briefing = lane.briefingPath || `docs/sprint-${sprintNum}-${sprintName}/${tn}-<lane>.md`;
|
|
200
|
+
const topic = lane.topic || lane.briefingPath || sprintName;
|
|
201
|
+
return [
|
|
202
|
+
`You are ${tn} in Sprint ${sprintNum} (${sprintName}). Boot sequence:`,
|
|
203
|
+
`1. memory_recall(project="${project}", query="${topic}")`,
|
|
204
|
+
`2. memory_recall(query="recent decisions and bugs")`,
|
|
205
|
+
`3. Read ~/.claude/CLAUDE.md and ./AGENTS.md`,
|
|
206
|
+
`4. Read docs/sprint-${sprintNum}-${sprintName}/PLANNING.md`,
|
|
207
|
+
`5. Read docs/sprint-${sprintNum}-${sprintName}/STATUS.md`,
|
|
208
|
+
`6. Read ${briefing}`,
|
|
209
|
+
'',
|
|
210
|
+
'Then begin. Stay in your lane. Post FINDING / FIX-PROPOSED / DONE in STATUS.md.',
|
|
211
|
+
"Don't bump versions, don't touch CHANGELOG, don't commit.",
|
|
212
|
+
].join('\n');
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
216
|
+
// Adapter export. spawn.env.GROK_MODEL defaults to the cheap-fast tier;
|
|
217
|
+
// per-lane override is the launcher's job at session-spawn time (Sprint 46
|
|
218
|
+
// reads `agent: grok` + optional `model-hint: code|reasoning-deep|...` from
|
|
219
|
+
// the lane brief frontmatter and overlays). GROK_API_KEY isn't repeated in
|
|
220
|
+
// spawn.env because the PTY inherits it from the TermDeck server's process
|
|
221
|
+
// env; the secrets.env load at server boot is the canonical path.
|
|
222
|
+
// ──────────────────────────────────────────────────────────────────────────
|
|
223
|
+
|
|
224
|
+
const grokAdapter = {
|
|
225
|
+
name: 'grok',
|
|
226
|
+
sessionType: 'grok',
|
|
227
|
+
matches: (cmd) => typeof cmd === 'string' && /(?:^|\s|\/)grok(?:\b|$)/i.test(cmd),
|
|
228
|
+
spawn: {
|
|
229
|
+
binary: 'grok',
|
|
230
|
+
defaultArgs: [],
|
|
231
|
+
env: {
|
|
232
|
+
GROK_MODEL: chooseModel(),
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
patterns: {
|
|
236
|
+
prompt: PROMPT,
|
|
237
|
+
thinking: THINKING,
|
|
238
|
+
editing: EDITING,
|
|
239
|
+
tool: TOOL,
|
|
240
|
+
idle: IDLE,
|
|
241
|
+
error: ERROR,
|
|
242
|
+
},
|
|
243
|
+
patternNames: {
|
|
244
|
+
error: 'grok-error',
|
|
245
|
+
tool: 'grok-tool',
|
|
246
|
+
},
|
|
247
|
+
statusFor,
|
|
248
|
+
parseTranscript,
|
|
249
|
+
bootPromptTemplate,
|
|
250
|
+
costBand: 'subscription',
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
module.exports = grokAdapter;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// Agent adapter registry — Sprint 44 T3
|
|
2
|
+
//
|
|
3
|
+
// Single source of truth for per-agent terminal behavior. Each adapter
|
|
4
|
+
// implements the contract documented in ./claude.js (and the memorialization
|
|
5
|
+
// doc § 4) — type detection, status patterns, transcript parsing, boot prompt
|
|
6
|
+
// templating, cost band. session.js consults this registry when analyzing
|
|
7
|
+
// PTY output so adding a new agent (Codex / Gemini / Grok in Sprint 45) is a
|
|
8
|
+
// new file in this directory + one entry in `AGENT_ADAPTERS` below — no
|
|
9
|
+
// switch statements to extend.
|
|
10
|
+
//
|
|
11
|
+
// Sprint 44 lands the Claude adapter only. The other agents stay on the
|
|
12
|
+
// in-file shim path in session.js until Sprint 45 T1-T3 ship their adapters
|
|
13
|
+
// and Sprint 45 T4 wires the launcher UI through the same registry.
|
|
14
|
+
|
|
15
|
+
const claude = require('./claude');
|
|
16
|
+
const codex = require('./codex');
|
|
17
|
+
const gemini = require('./gemini');
|
|
18
|
+
const grok = require('./grok');
|
|
19
|
+
|
|
20
|
+
// Keyed by adapter name (NOT session.meta.type — adapters expose their own
|
|
21
|
+
// `sessionType` field for that mapping). Order is iteration order for the
|
|
22
|
+
// detect loop in session.js, so list more-specific adapters before less.
|
|
23
|
+
const AGENT_ADAPTERS = {
|
|
24
|
+
claude,
|
|
25
|
+
codex,
|
|
26
|
+
gemini,
|
|
27
|
+
grok,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
// Convenience accessor — returns the adapter whose `sessionType` matches the
|
|
31
|
+
// session.meta.type value, or undefined if no adapter has claimed that type.
|
|
32
|
+
// session.js calls this in the hot path; keep it cheap.
|
|
33
|
+
function getAdapterForSessionType(type) {
|
|
34
|
+
if (!type) return undefined;
|
|
35
|
+
for (const adapter of Object.values(AGENT_ADAPTERS)) {
|
|
36
|
+
if (adapter.sessionType === type) return adapter;
|
|
37
|
+
}
|
|
38
|
+
return undefined;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Convenience accessor — first adapter whose prompt regex matches `data` or
|
|
42
|
+
// whose `matches(command)` returns true. Returns undefined if no adapter
|
|
43
|
+
// claims the session, leaving the caller to fall back to legacy detection
|
|
44
|
+
// (gemini / python-server / shell). session.js calls this from `_detectType`.
|
|
45
|
+
function detectAdapter(data, command) {
|
|
46
|
+
for (const adapter of Object.values(AGENT_ADAPTERS)) {
|
|
47
|
+
const promptHit = adapter.patterns
|
|
48
|
+
&& adapter.patterns.prompt
|
|
49
|
+
&& typeof data === 'string'
|
|
50
|
+
&& adapter.patterns.prompt.test(data);
|
|
51
|
+
const cmdHit = typeof adapter.matches === 'function' && adapter.matches(command);
|
|
52
|
+
if (promptHit || cmdHit) return adapter;
|
|
53
|
+
}
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = {
|
|
58
|
+
AGENT_ADAPTERS,
|
|
59
|
+
getAdapterForSessionType,
|
|
60
|
+
detectAdapter,
|
|
61
|
+
};
|
|
@@ -8,7 +8,9 @@ const { WebSocketServer } = require('ws');
|
|
|
8
8
|
const path = require('path');
|
|
9
9
|
const os = require('os');
|
|
10
10
|
const fs = require('fs');
|
|
11
|
+
const dns = require('dns');
|
|
11
12
|
const { v4: uuidv4 } = require('uuid');
|
|
13
|
+
const { createCachedLookup, createFailureLogger } = require('./rumen-pool-resilience');
|
|
12
14
|
|
|
13
15
|
// Conditional imports (graceful fallback if not installed yet)
|
|
14
16
|
let pty, Database, pg;
|
|
@@ -19,10 +21,18 @@ try { pg = require('pg'); } catch { pg = null; }
|
|
|
19
21
|
// Module-level singleton Postgres pool for rumen_insights (petvetbid DB).
|
|
20
22
|
// Lazy-initialized on first rumen endpoint hit so startup stays fast and
|
|
21
23
|
// servers without DATABASE_URL never pay the connection cost.
|
|
24
|
+
//
|
|
25
|
+
// DNS-resilience (Sprint 45 side-task): the pool is constructed with a
|
|
26
|
+
// cached `lookup` function that retries DNS failures with jittered
|
|
27
|
+
// exponential backoff and serves stale entries during transient outages.
|
|
28
|
+
// Pool errors / recoveries flow through a recency-graded logger so a
|
|
29
|
+
// flapping host doesn't flood the log.
|
|
22
30
|
let _rumenPool = null;
|
|
23
31
|
let _rumenPoolFailed = false;
|
|
24
32
|
let _rumenPoolFailedAt = 0;
|
|
25
33
|
const RUMEN_POOL_RETRY_MS = 30_000;
|
|
34
|
+
const _rumenLookup = createCachedLookup(dns);
|
|
35
|
+
const _rumenLogger = createFailureLogger(console);
|
|
26
36
|
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
27
37
|
function getRumenPool() {
|
|
28
38
|
if (_rumenPool) return _rumenPool;
|
|
@@ -38,14 +48,14 @@ function getRumenPool() {
|
|
|
38
48
|
connectionString: process.env.DATABASE_URL,
|
|
39
49
|
max: 4,
|
|
40
50
|
idleTimeoutMillis: 30000,
|
|
41
|
-
connectionTimeoutMillis: 5000
|
|
42
|
-
|
|
43
|
-
_rumenPool.on('error', (err) => {
|
|
44
|
-
console.warn('[rumen] pg pool error:', err.message);
|
|
51
|
+
connectionTimeoutMillis: 5000,
|
|
52
|
+
lookup: _rumenLookup,
|
|
45
53
|
});
|
|
54
|
+
_rumenPool.on('error', (err) => _rumenLogger.logFailure(`pg pool error: ${err.message}`));
|
|
55
|
+
_rumenPool.on('connect', () => _rumenLogger.logRecovery());
|
|
46
56
|
return _rumenPool;
|
|
47
57
|
} catch (err) {
|
|
48
|
-
|
|
58
|
+
_rumenLogger.logFailure(`failed to create pg pool: ${err.message}`);
|
|
49
59
|
_rumenPoolFailed = true;
|
|
50
60
|
_rumenPoolFailedAt = Date.now();
|
|
51
61
|
return null;
|
|
@@ -69,6 +79,7 @@ const { createGraphRoutes } = require('./graph-routes');
|
|
|
69
79
|
const { createProjectsRoutes } = require('./projects-routes');
|
|
70
80
|
const orchestrationPreview = require('./orchestration-preview');
|
|
71
81
|
const { createPtyReaper } = require('./pty-reaper');
|
|
82
|
+
const { AGENT_ADAPTERS } = require('./agent-adapters');
|
|
72
83
|
|
|
73
84
|
// Sprint 37 T3 — lazy resolution of T2's CLI modules. The orchestration-preview
|
|
74
85
|
// helper is decoupled from T2's templates.js / init-project.js; we resolve
|
|
@@ -1244,6 +1255,28 @@ function createServer(config) {
|
|
|
1244
1255
|
res.json(t);
|
|
1245
1256
|
});
|
|
1246
1257
|
|
|
1258
|
+
// GET /api/agent-adapters - serializable projection of the multi-agent
|
|
1259
|
+
// registry for the launcher. Sprint 45 T4: replaces the hardcoded
|
|
1260
|
+
// claude/cc/gemini/python branches in app.js with a registry-driven
|
|
1261
|
+
// detector. Each entry exposes only the fields the client needs:
|
|
1262
|
+
// • name — adapter id ("claude", "codex", "gemini", "grok")
|
|
1263
|
+
// • sessionType — meta.type the launcher should set
|
|
1264
|
+
// • binary — canonical command name; client matches `^binary\b` (i)
|
|
1265
|
+
// • costBand — 'free' | 'pay-per-token' | 'subscription' (Sprint 46
|
|
1266
|
+
// surfaces this in PLANNING.md cost annotations)
|
|
1267
|
+
// Functions / RegExps are NOT serialized — match logic lives client-side
|
|
1268
|
+
// and uses the binary as the prefix anchor. Adapter-specific shorthand
|
|
1269
|
+
// (e.g. `cc` → `claude`) is normalized in app.js before this lookup.
|
|
1270
|
+
app.get('/api/agent-adapters', (req, res) => {
|
|
1271
|
+
const list = Object.values(AGENT_ADAPTERS).map((a) => ({
|
|
1272
|
+
name: a.name,
|
|
1273
|
+
sessionType: a.sessionType,
|
|
1274
|
+
binary: a.spawn && a.spawn.binary,
|
|
1275
|
+
costBand: a.costBand,
|
|
1276
|
+
}));
|
|
1277
|
+
res.json(list);
|
|
1278
|
+
});
|
|
1279
|
+
|
|
1247
1280
|
// Public-shape helper so GET and PATCH return the same envelope.
|
|
1248
1281
|
function publicConfigPayload() {
|
|
1249
1282
|
return {
|
|
@@ -1650,10 +1683,16 @@ function createServer(config) {
|
|
|
1650
1683
|
if (!pool) return res.json({ enabled: false });
|
|
1651
1684
|
|
|
1652
1685
|
try {
|
|
1686
|
+
// Sprint 45 side-task 2 — order by COALESCE(started_at, completed_at) so
|
|
1687
|
+
// jobs whose upstream writer (the @jhizzard/rumen createJob INSERT in the
|
|
1688
|
+
// Edge Function) leaves started_at NULL still surface as "latest" via
|
|
1689
|
+
// their populated completed_at. Pre-fix the query returned a 2026-04-16
|
|
1690
|
+
// job permanently because that was the last row to have started_at
|
|
1691
|
+
// populated — every subsequent insert lands started_at = NULL.
|
|
1653
1692
|
const jobSql =
|
|
1654
1693
|
`SELECT id, status, completed_at, sessions_processed, insights_generated
|
|
1655
1694
|
FROM rumen_jobs
|
|
1656
|
-
ORDER BY started_at DESC
|
|
1695
|
+
ORDER BY COALESCE(started_at, completed_at) DESC NULLS LAST
|
|
1657
1696
|
LIMIT 1`;
|
|
1658
1697
|
const insightSql =
|
|
1659
1698
|
`SELECT
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
// Sprint 45 side-task — DNS-resilience policy for the rumen pg.Pool.
|
|
2
|
+
//
|
|
3
|
+
// Two factories, both DI-friendly so tests can stub dns + console:
|
|
4
|
+
//
|
|
5
|
+
// createCachedLookup(dnsModule, opts)
|
|
6
|
+
// Returns a (hostname, options, callback) function suitable for
|
|
7
|
+
// pg.Pool's `lookup` config. Caches successful lookups for
|
|
8
|
+
// `cacheTtlMs` (default 30s). On lookup failure, retries with
|
|
9
|
+
// jittered exponential backoff up to `backoffCapsMs.length`
|
|
10
|
+
// attempts (default [100, 500, 2000, 5000]). If every retry fails
|
|
11
|
+
// and a stale cached address exists, serves stale rather than
|
|
12
|
+
// failing — DNS flickers shouldn't tear the pool down.
|
|
13
|
+
//
|
|
14
|
+
// createFailureLogger(consoleModule, opts)
|
|
15
|
+
// Returns { logFailure, logRecovery } closures owning a private
|
|
16
|
+
// failure-window state. First failure logs `warn`; consecutive
|
|
17
|
+
// failures within `windowMs` (default 60s) downgrade to `debug`;
|
|
18
|
+
// a recovery after any prior failure logs `info` once and clears
|
|
19
|
+
// the window. Idempotent recovery (no failures pending) is silent.
|
|
20
|
+
//
|
|
21
|
+
// Both factories are pure — no module-scope state, no side effects on
|
|
22
|
+
// require — so tests can construct fresh instances per case.
|
|
23
|
+
|
|
24
|
+
'use strict';
|
|
25
|
+
|
|
26
|
+
const DEFAULT_BACKOFF_CAPS_MS = [100, 500, 2000, 5000];
|
|
27
|
+
const DEFAULT_DNS_CACHE_TTL_MS = 30_000;
|
|
28
|
+
const DEFAULT_FAILURE_WINDOW_MS = 60_000;
|
|
29
|
+
|
|
30
|
+
function _jitter(capMs, rng) {
|
|
31
|
+
return Math.floor(capMs * (0.5 + rng() * 0.5));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function createCachedLookup(dnsModule, opts = {}) {
|
|
35
|
+
const cacheTtlMs = opts.cacheTtlMs ?? DEFAULT_DNS_CACHE_TTL_MS;
|
|
36
|
+
const backoffCapsMs = opts.backoffCapsMs ?? DEFAULT_BACKOFF_CAPS_MS;
|
|
37
|
+
const setTimeoutFn = opts.setTimeout ?? setTimeout;
|
|
38
|
+
const now = opts.now ?? Date.now;
|
|
39
|
+
const rng = opts.random ?? Math.random;
|
|
40
|
+
const cache = new Map();
|
|
41
|
+
|
|
42
|
+
return function cachedLookup(hostname, options, callback) {
|
|
43
|
+
if (typeof options === 'function') { callback = options; options = {}; }
|
|
44
|
+
const t = now();
|
|
45
|
+
const hit = cache.get(hostname);
|
|
46
|
+
if (hit && hit.expiresAt > t) {
|
|
47
|
+
return callback(null, hit.address, hit.family);
|
|
48
|
+
}
|
|
49
|
+
let attempt = 0;
|
|
50
|
+
const tryOnce = () => {
|
|
51
|
+
dnsModule.lookup(hostname, options, (err, address, family) => {
|
|
52
|
+
if (!err) {
|
|
53
|
+
cache.set(hostname, { address, family, expiresAt: now() + cacheTtlMs });
|
|
54
|
+
return callback(null, address, family);
|
|
55
|
+
}
|
|
56
|
+
if (attempt >= backoffCapsMs.length) {
|
|
57
|
+
if (hit) return callback(null, hit.address, hit.family);
|
|
58
|
+
return callback(err);
|
|
59
|
+
}
|
|
60
|
+
const delay = _jitter(backoffCapsMs[attempt++], rng);
|
|
61
|
+
setTimeoutFn(tryOnce, delay);
|
|
62
|
+
});
|
|
63
|
+
};
|
|
64
|
+
tryOnce();
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function createFailureLogger(consoleModule, opts = {}) {
|
|
69
|
+
const windowMs = opts.windowMs ?? DEFAULT_FAILURE_WINDOW_MS;
|
|
70
|
+
const prefix = opts.prefix ?? '[rumen]';
|
|
71
|
+
const now = opts.now ?? Date.now;
|
|
72
|
+
let firstAt = 0;
|
|
73
|
+
let lastAt = 0;
|
|
74
|
+
let count = 0;
|
|
75
|
+
|
|
76
|
+
function logFailure(message) {
|
|
77
|
+
const t = now();
|
|
78
|
+
if (firstAt > 0 && (t - lastAt) < windowMs) {
|
|
79
|
+
count += 1;
|
|
80
|
+
lastAt = t;
|
|
81
|
+
const debug = consoleModule.debug || consoleModule.log;
|
|
82
|
+
debug(`${prefix} (debounced ${count}) ${message}`);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
firstAt = t;
|
|
86
|
+
lastAt = t;
|
|
87
|
+
count = 1;
|
|
88
|
+
consoleModule.warn(`${prefix} ${message}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function logRecovery(message) {
|
|
92
|
+
if (firstAt === 0) return;
|
|
93
|
+
const info = consoleModule.info || consoleModule.log;
|
|
94
|
+
info(`${prefix} recovered after ${count} failure(s)${message ? ` — ${message}` : ''}`);
|
|
95
|
+
firstAt = 0;
|
|
96
|
+
lastAt = 0;
|
|
97
|
+
count = 0;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function _state() { return { firstAt, lastAt, count }; }
|
|
101
|
+
|
|
102
|
+
return { logFailure, logRecovery, _state };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
module.exports = {
|
|
106
|
+
createCachedLookup,
|
|
107
|
+
createFailureLogger,
|
|
108
|
+
DEFAULT_BACKOFF_CAPS_MS,
|
|
109
|
+
DEFAULT_DNS_CACHE_TTL_MS,
|
|
110
|
+
DEFAULT_FAILURE_WINDOW_MS,
|
|
111
|
+
};
|