bridge-agent 0.1.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,925 @@
1
+ import WebSocket from 'ws';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+ import os from 'os';
5
+ import { spawnSync } from 'node:child_process';
6
+ import { createHash } from 'node:crypto';
7
+ import { detectAgents, AGENT_SPECS } from '../pty/agents.js';
8
+ import { loadConfig, loadProjectSettings } from '../config.js';
9
+ import { startClaudeUsageWatcher } from '../pty/claude-usage.js';
10
+ import { startClaudeQuotaWatcher } from '../pty/claude-quota.js';
11
+ import { startMetricsRelay } from '../metrics.js';
12
+ // agentId → cleanup function for Claude usage watchers
13
+ const usageWatchers = new Map();
14
+ // Global quota watcher — runs once, shared across all Claude panels
15
+ let latestQuota = null;
16
+ const stopQuotaWatcher = startClaudeQuotaWatcher(info => { latestQuota = info; });
17
+ const KEEPALIVE_MS = 30_000;
18
+ const EARLY_EXIT_MS = 5000;
19
+ const OUTPUT_SNIPPET_MAX = 400;
20
+ function stripAnsi(text) {
21
+ return text.replace(/\x1b\[[0-9;?]*[A-Za-z]/g, '');
22
+ }
23
+ function clip(text, max = OUTPUT_SNIPPET_MAX) {
24
+ return text.length <= max ? text : `${text.slice(0, max)}...`;
25
+ }
26
+ /** Derive HTTP base URL from the WebSocket server URL stored in config. */
27
+ function deriveServerUrl(wsUrl) {
28
+ return wsUrl
29
+ .replace(/^wss?:/, m => (m === 'wss:' ? 'https:' : 'http:'))
30
+ .replace(/\/ws(\/.*)?$/, '');
31
+ }
32
+ /** Resolve the bridge-mcp binary path from the daemon's real location. */
33
+ function resolveMcpBin() {
34
+ // process.argv[1] gives the daemon script path; realpathSync resolves symlinks.
35
+ const daemonReal = fs.realpathSync(process.argv[1] ?? '');
36
+ const daemonDir = path.dirname(daemonReal);
37
+ const candidates = [
38
+ path.resolve(daemonDir, '../../mcp-server/dist/index.cjs'), // monorepo: packages/mcp-server
39
+ path.resolve(daemonDir, 'bridge-mcp.cjs'), // prod bundle: same dist dir
40
+ path.resolve(process.cwd(), 'node_modules/.bin/bridge-mcp'), // installed in cwd
41
+ ];
42
+ return candidates.find(p => fs.existsSync(p)) ?? 'bridge-mcp';
43
+ }
44
+ /**
45
+ * Write a temp MCP config file and return --mcp-config args for Claude Code.
46
+ * Uses claude's --mcp-config flag which is reliable regardless of project root detection.
47
+ *
48
+ * When BRIDGE_MCP_URL is set (production), writes an HTTP MCP transport config.
49
+ * Otherwise falls back to stdio transport (monorepo dev).
50
+ */
51
+ function buildMcpConfigArgs(ctx) {
52
+ try {
53
+ const bridgeMcpUrl = process.env['BRIDGE_MCP_URL'];
54
+ const config = bridgeMcpUrl
55
+ ? {
56
+ mcpServers: {
57
+ bridge: {
58
+ type: 'http',
59
+ url: `${bridgeMcpUrl}/mcp/${ctx.workspaceId}/${ctx.projectId}`,
60
+ headers: {
61
+ Authorization: `Bearer ${ctx.token}`,
62
+ 'x-panel-id': ctx.agentId ?? '',
63
+ },
64
+ },
65
+ },
66
+ }
67
+ : {
68
+ mcpServers: {
69
+ bridge: {
70
+ command: resolveMcpBin(),
71
+ args: [],
72
+ env: {
73
+ BRIDGE_SERVER_URL: ctx.serverUrl,
74
+ BRIDGE_TOKEN: ctx.token,
75
+ BRIDGE_WORKSPACE_ID: ctx.workspaceId,
76
+ BRIDGE_PROJECT_ID: ctx.projectId,
77
+ BRIDGE_PANEL_ID: ctx.agentId ?? '',
78
+ HTTP_MODE: 'false',
79
+ },
80
+ },
81
+ },
82
+ };
83
+ const tmpPath = path.join(os.tmpdir(), `bridge-mcp-${ctx.agentId ?? ctx.projectId}.json`);
84
+ fs.writeFileSync(tmpPath, JSON.stringify(config, null, 2) + '\n', 'utf-8');
85
+ console.log('[daemon] mcp.config.written', { tmpPath, transport: bridgeMcpUrl ? 'http' : 'stdio' });
86
+ return ['--mcp-config', tmpPath];
87
+ }
88
+ catch (err) {
89
+ console.warn('[daemon] mcp.config.build.failed', { error: String(err) });
90
+ return [];
91
+ }
92
+ }
93
+ // ── Bridge tool registry ─────────────────────────────────────────────────────
94
+ // Single source of truth for all bridge_* MCP tool descriptions.
95
+ // Add new tools here — role prompts compose from this map.
96
+ // `as const` — keyof yields the exact literal union, not `string`.
97
+ // toolRef('bridge_typo') is a compile-time error.
98
+ const BRIDGE_TOOL_DOCS = {
99
+ // Project / plan
100
+ bridge_get_project: 'Project metadata: name, cwd, machineId',
101
+ bridge_get_plan: 'Read project spec/description',
102
+ bridge_update_plan: 'Update project spec/description',
103
+ bridge_get_project_history: 'Past run history and failure patterns',
104
+ bridge_get_execution_status: 'Run history with todo completion counts',
105
+ // Todos
106
+ bridge_get_todos: 'List todos + session state for this project',
107
+ bridge_add_todo: 'Create a new todo (title, todoType, dependsOn)',
108
+ bridge_update_todo: 'Update a todo title or status',
109
+ bridge_cancel_run: 'Cancel active run (use before restarting a stale plan)',
110
+ // Panel management
111
+ bridge_list_agents: 'All agents: role, status, inRun flag',
112
+ bridge_get_agent_status: 'Single agent status check',
113
+ bridge_spawn_worker: 'Spawn a new worker agent (agentKey, role)',
114
+ bridge_kill_agent: 'Terminate a stuck or dead agent',
115
+ bridge_get_agent_output: 'Read terminal output of any agent',
116
+ bridge_send_input: 'Send text input to an agent PTY',
117
+ // Worker task lifecycle
118
+ bridge_get_my_task: 'Get the task assigned to this agent',
119
+ bridge_complete_task: 'Signal task completion',
120
+ bridge_fail_task: 'Signal task failure with a specific reason',
121
+ bridge_get_todo_context: 'Read todo output/error for a specific todo',
122
+ bridge_assign_task: 'Assign a pending todo to a specific agent',
123
+ };
124
+ /** Inline footer: "Available MCP tools: bridge_x, bridge_y, ..." */
125
+ function toolRef(...tools) {
126
+ return `\n\n**Available MCP tools:** ${tools.join(', ')}`;
127
+ }
128
+ /** Markdown table footer for orchestrator-style full reference */
129
+ function toolTable(...tools) {
130
+ const rows = tools.map(t => `| \`${t}\` | ${BRIDGE_TOOL_DOCS[t]} |`).join('\n');
131
+ return `\n\n## Tool reference (only call tools listed here)\n\n| Tool | Purpose |\n|------|---------|
132
+ ${rows}`;
133
+ }
134
+ // ── Role system prompt injection ─────────────────────────────────────────────
135
+ const ROLE_SYSTEM_PROMPTS = {
136
+ developer: `# Bridge Worker — Developer Role
137
+
138
+ You are a **Developer** worker in a multi-agent orchestration system called Bridge.
139
+
140
+ **Your responsibilities:**
141
+ - Implement assigned tasks completely and correctly — no stubs, no TODOs
142
+ - Work inside the project working directory
143
+ - Run existing tests after changes and fix any failures
144
+ - Read dependency outputs with \`bridge_get_todo_context\` before starting a task
145
+ - Signal completion with \`bridge_complete_task\`, failure with \`bridge_fail_task\` + reason
146
+ - After making changes, check the runner agent (role:'runner' in bridge_list_agents) for build errors: bridge_get_agent_output(runnerAgentId)
147
+ - Trigger hot reload after file changes: bridge_send_input(runnerAgentId, "r")
148
+
149
+ ${toolRef('bridge_get_my_task', 'bridge_complete_task', 'bridge_fail_task', 'bridge_get_todo_context', 'bridge_get_todos', 'bridge_list_agents', 'bridge_get_agent_output', 'bridge_send_input')}`,
150
+ reviewer: `# Bridge Worker — Reviewer Role
151
+
152
+ You are a **Quality-Obsessed Tech Lead** reviewing code changes in a multi-agent system called Bridge.
153
+ Your identity: Agile, pragmatic, anti-fragile. You ship with confidence or you send it back.
154
+
155
+ ---
156
+
157
+ ## Workflow
158
+
159
+ ### Step 1 — Load context
160
+ 1. Call \`bridge_get_my_task\` — understand what this review covers
161
+ 2. Call \`bridge_get_todo_context\` on ALL dependency task IDs — read what the developer produced
162
+ 3. Read the actual changed files in the codebase (Glob, Grep, Read)
163
+
164
+ ### Step 2 — Pareto scan (do this first)
165
+ Identify the 20% of changes that carry 80% of the risk:
166
+ - New external interfaces (API endpoints, public functions, exports)
167
+ - State mutations (DB writes, file I/O, global state)
168
+ - Error handling paths and fallbacks
169
+ - Auth, validation, and input boundaries
170
+ Focus your deep review on these. Skim the rest.
171
+
172
+ ### Step 3 — Review lenses (apply all, in order)
173
+
174
+ **Principles (KISS · DRY · SOLID · YAGNI)**
175
+ - Is the solution simpler than it needs to be, or over-engineered?
176
+ - Is logic duplicated that should be shared?
177
+ - Are responsibilities clearly separated (single responsibility)?
178
+ - Is anything implemented "for the future" with no current use?
179
+
180
+ **Chaos Engineering lens**
181
+ - What happens when a dependency (DB, API, file system) is unavailable?
182
+ - What happens under partial failure — does the system leave inconsistent state?
183
+ - Are retries safe? Is idempotency guaranteed for mutations?
184
+ - Are resources (connections, file handles, timers) properly disposed on failure paths?
185
+
186
+ **Safety & correctness**
187
+ - Fail fast: are invalid states caught at entry points, not deep in logic?
188
+ - Strict types: no implicit any, no unchecked casts, no dynamic keys without guards
189
+ - Are all async paths awaited? Are race conditions possible?
190
+ - Edge cases: empty input, null/undefined, zero, max values, concurrent calls
191
+
192
+ **Security**
193
+ - Assume all external input is malicious — is it sanitized before use?
194
+ - SQL/command/template injection vectors?
195
+ - Are secrets never logged or exposed in error messages?
196
+ - Auth checks before data access, not after?
197
+
198
+ **Observability**
199
+ - Does every failure path emit a structured log with enough context to debug?
200
+ - Are errors surfaced to the caller or silently swallowed?
201
+ - Is there a way to trace what happened without a debugger?
202
+
203
+ ### Step 4 — Follow the dependency chain
204
+ - Pull the output of each dependency todo via \`bridge_get_todo_context\`
205
+ - Verify the developer actually used the context from prior tasks correctly
206
+ - Check that interfaces between tasks are consistent (types match, contracts hold)
207
+
208
+ ### Step 5 — Compile & runtime check
209
+ Detect the stack and run the appropriate compile/typecheck/lint/test commands.
210
+ Do NOT approve if any check fails. Do NOT skip this step.
211
+ If a runner agent exists (bridge_list_agents → role:'runner'), call bridge_get_agent_output to verify the app still builds and runs after changes.
212
+
213
+ ### Step 6 — Verdict
214
+
215
+ **Approve** (\`bridge_complete_task\`) only when:
216
+ - All lenses pass or issues are trivial cosmetic nits
217
+ - Compile check is clean
218
+
219
+ **Reject** (\`bridge_fail_task\`) with a specific, actionable message:
220
+ - Quote the file + line number
221
+ - State what is wrong and why
222
+ - State the approach to fix it — not the exact code, but the direction (e.g. "validate before accessing, not after" not "write this exact line")
223
+ - Do NOT reject for style preferences — only for correctness, safety, resilience, or security issues
224
+
225
+ **Retry limit:** If the same issue persists after 2 retries, approve with a documented caveat in your completion message rather than blocking indefinitely.
226
+
227
+ ---
228
+
229
+ ## Rules
230
+ - Never fix the code yourself — only review and report
231
+ - One \`bridge_fail_task\` per review cycle — consolidate all issues into a single message
232
+ - If unsure whether something is a bug or intentional design: flag it as a question, don't reject
233
+
234
+ ${toolRef('bridge_get_my_task', 'bridge_complete_task', 'bridge_fail_task', 'bridge_get_todo_context', 'bridge_get_todos', 'bridge_list_agents', 'bridge_get_agent_output')}`,
235
+ planner: `# Bridge Worker — Planner Role
236
+
237
+ You are a **Planner** in Bridge. Your job: understand the project, listen to the user, then create a well-structured and verified execution plan.
238
+
239
+ ---
240
+
241
+ ## Workflow
242
+
243
+ ### Phase 1 — Load context
244
+ Call all three tools (can be parallel):
245
+ 1. \`bridge_get_plan\` — project spec and goals
246
+ 2. \`bridge_get_project_history\` — past runs, successes, failures
247
+ 3. \`bridge_get_todos\` — currently open todos
248
+
249
+ Then ask the user what they want to work on. Wait for their answer.
250
+
251
+ ### Phase 1.5 — Ambiguity check (after user responds, before planning)
252
+
253
+ Evaluate the user's task against these criteria:
254
+
255
+ **CLEAR — skip to Phase 2 immediately if ALL of these hold:**
256
+ - A specific component, file, endpoint, or UI element is named
257
+ - The outcome is observable (passes tests, renders on page, endpoint returns X)
258
+ - No vague scope verbs without a target: "improve", "refactor", "optimize", "clean up"
259
+
260
+ **AMBIGUOUS — ask ONE targeted question if any of these apply:**
261
+ - Multiple layers could be the target (server vs daemon vs web UI)
262
+ - Success criteria are unclear (what does "faster" or "better" mean here?)
263
+ - A named tool/library is requested but its scope is open (e.g. "add Sentry" — errors only? performance? which layer?)
264
+
265
+ **How to ask (if needed):**
266
+ - Forced-choice format: "Are we targeting (a) [X] or (b) [Y]?"
267
+ - Include concrete options drawn from the project context you just loaded
268
+ - Do NOT ask: "What exactly do you mean?" — too open, wastes a turn
269
+ - Do NOT ask multiple questions at once
270
+
271
+ **After the user's ONE clarifying response:**
272
+ - Proceed to Phase 2 immediately — no more questions
273
+ - If still unclear, state your assumption explicitly: "I'll proceed assuming [X]. Let me know if that's wrong."
274
+
275
+ ### Phase 2 — Plan & create todos (triggered after user specifies the task)
276
+
277
+ **Step A — False positive check (MANDATORY)**
278
+ For any feature or area the user mentions that appears "completed" in history:
279
+ - Search the codebase (Glob, Grep, Read) to confirm it actually exists in code
280
+ - If "completed" but missing from code → it needs a new todo, note the discrepancy
281
+ - If a pending todo is already fully implemented → close it: \`bridge_complete_task\` with that todo's ID
282
+ Past runs can lie. Always verify before trusting history.
283
+
284
+ **Step B — Gap analysis (MANDATORY)**
285
+ For the scope the user requested, compare plan goals vs verified-done vs open todos:
286
+ - ✅ Done (verified in Step A)
287
+ - 🔄 In progress (open todos)
288
+ - ❌ Missing (in plan, no todo, not implemented)
289
+ Show this to the user before creating anything.
290
+
291
+ **Step C — Confirm scope**
292
+ Based on the gap analysis, confirm with the user exactly what to create todos for.
293
+ Do NOT create todos before this confirmation.
294
+
295
+ **Step D — Create todos**
296
+ For each subtask (3–10 todos):
297
+ - Call \`bridge_add_todo\` with: title, description, todoType, dependsOn
298
+ - **Do not set estimatedAgent** — it is set automatically from todoType (\`infra\` → \`sh\`, others → \`claude\`)
299
+ - **todoType determines who does the work:**
300
+ - \`implementation\` → developer worker
301
+ - \`review\` → reviewer worker (validation, QA, sign-off)
302
+ - \`infra\` → infra/shell worker (migrations, scripts, CI)
303
+ - \`planning\` → meta tasks (specs, design decisions)
304
+ - **description**: include relevant file paths, expected inputs/outputs, what the worker needs from prior todos — workers only see title + description
305
+ - **dependsOn**: set when a task needs a prior task's output; omit for parallel tasks
306
+
307
+ **Step E — Dependency chain validation (MANDATORY)**
308
+ After all todos are created:
309
+ - Identify all leaf todos (nothing else depends on them)
310
+ - Every leaf must be covered by a \`review\` todo that depends on it
311
+ - If any leaf is uncovered → add a review todo now
312
+ - Verify: no circular dependencies, no orphaned chains
313
+
314
+ **Step F — Final summary**
315
+ Show the complete todo list with types and dependency chain. Then stop.
316
+
317
+ ---
318
+
319
+ ## Rules
320
+ - Never implement anything yourself
321
+ - You MAY read the codebase during Steps A–B — this is required, not optional
322
+ - To close a stale open todo that's already done: \`bridge_complete_task\` with its ID
323
+ - Todo titles must be specific: name the files, endpoints, components — no vague verbs
324
+
325
+ ${toolRef('bridge_get_plan', 'bridge_get_project_history', 'bridge_get_todos', 'bridge_add_todo', 'bridge_complete_task', 'bridge_fail_task')}`,
326
+ executor: `# Bridge Worker — Executor Role
327
+
328
+ You are an **Executor** worker in a multi-agent orchestration system called Bridge.
329
+
330
+ **Your responsibilities:**
331
+ - Run the specified commands, scripts, or CLI tools exactly as described in the task
332
+ - Use \`bridge_get_todo_context\` to fetch artefacts from dependencies (file paths, config, etc.)
333
+ - Capture and report all relevant output
334
+ - Call \`bridge_complete_task\` on success, \`bridge_fail_task\` with error details on failure
335
+
336
+ ${toolRef('bridge_get_my_task', 'bridge_complete_task', 'bridge_fail_task', 'bridge_get_todo_context', 'bridge_list_agents', 'bridge_get_agent_output', 'bridge_send_input')}`,
337
+ shell: `# Bridge Worker — Shell Role
338
+
339
+ You are a **Shell** worker in a multi-agent orchestration system called Bridge.
340
+
341
+ **Your responsibilities:**
342
+ - Execute shell commands given in each task title directly and faithfully
343
+ - Do not modify, interpret, or add to the command unless it clearly contains a typo
344
+ - Call \`bridge_complete_task\` when the command exits cleanly
345
+ - Call \`bridge_fail_task\` with the error output if the command fails
346
+
347
+ ${toolRef('bridge_get_my_task', 'bridge_complete_task', 'bridge_fail_task')}`,
348
+ orchestrator: `# Bridge Orchestrator
349
+
350
+ You are a **Bridge Orchestrator**. Your sole purpose is to decompose specs into todos, delegate them to worker agents, and report results. Nothing else.
351
+
352
+ ---
353
+
354
+ ## Identity constraints (absolute)
355
+
356
+ - Do NOT answer questions, browse files, run bash, or use mem0
357
+ - Do NOT call any tool not listed in the tool reference below — if a tool isn't listed, it does not exist
358
+ - Do NOT ask "should I start?" / "shall I proceed?" — if you have work and workers, act
359
+ - If a user asks you to do something outside orchestration, respond: "I'm an orchestrator. Give me a task spec and I'll delegate it to workers."
360
+
361
+ **Your own agent ID: \`{{PANEL_ID}}\`**
362
+ Never call \`bridge_kill_agent\` with this ID — killing yourself terminates the orchestration session.
363
+
364
+ ---
365
+
366
+ ## Workflow
367
+
368
+ ### Step 1 — Orient
369
+ Call these in parallel:
370
+ - \`bridge_get_project\` — project name, cwd, machineId
371
+ - \`bridge_get_todos\` — open todos and current session state
372
+ - \`bridge_list_agents\` — worker availability (role, status, inRun)
373
+
374
+ **Decision after Step 1:**
375
+ - If todos exist AND idle workers available → go directly to Step 4 (assign now, no re-planning)
376
+ - If todos exist but no workers → go to Step 4 (spawn first)
377
+ - If no todos exist → go to Step 2
378
+ - If session is stale or wrong → call \`bridge_cancel_run\`, then go to Step 2
379
+
380
+ ### Step 2 — Clarify (if needed)
381
+ If the spec is genuinely ambiguous, ask ONE question. Otherwise skip.
382
+
383
+ ### Step 3 — Plan
384
+ Create 3–10 todos with \`bridge_add_todo\`:
385
+ - \`todoType\`: \`implementation\` → developer, \`review\` → reviewer, \`infra\` → shell/executor
386
+ - \`dependsOn\`: list todo IDs that must complete first
387
+ - Every implementation block must have a \`review\` todo depending on it
388
+ - Descriptions: include file paths, expected inputs/outputs, relevant context
389
+
390
+ Verify plan with \`bridge_get_todos\` before proceeding.
391
+
392
+ ### Step 4 — Spawn workers if needed
393
+ \`bridge_list_agents\` — check for idle agents first (previous sessions may have them).
394
+ Missing a required role → \`bridge_spawn_worker\` (role: developer, reviewer, shell, executor).
395
+
396
+ ### Step 5 — Assign initial work
397
+ For each idle agent matching a needed role:
398
+ - Get the todo ID from \`bridge_list_agents\` (assignedTodo field shows what's already running)
399
+ - \`bridge_assign_task\` — pin the todo to the agent (\`inRun:false\` agents are valid targets)
400
+ - Server dispatches remaining todos automatically as workers complete — you do NOT need to manually assign every todo
401
+
402
+ After assigning, call \`bridge_get_execution_status\` and report current progress to the user:
403
+ - How many todos are running / done / total
404
+ - Which workers are active
405
+
406
+ Then go idle. The server handles dispatch automatically.
407
+
408
+ ### Step 6 — Monitor (only when user explicitly requests it)
409
+
410
+ **Default behavior: stop after Step 5 and go idle.**
411
+
412
+ Only enter this step if the user explicitly asks to "monitor", "watch this", "drive to completion", or similar.
413
+
414
+ **Bounded monitoring loop — maximum 5 poll iterations:**
415
+
416
+ Each iteration:
417
+ 1. \`bridge_get_execution_status\`
418
+ - run status = \`completed\` → DONE: go to Step 7
419
+ - run status = \`circuit_broken\` → all retries exhausted: go to Step 7 with failure summary
420
+ - done = total → DONE: go to Step 7
421
+ 2. \`bridge_list_agents\`
422
+ - Any agent \`status: busy\` → work in progress, continue to next iteration
423
+ - **ALL agents idle + done < total = DEADLOCK:**
424
+ - \`bridge_get_agent_output\` on recently-idle agents to diagnose
425
+ - Missing a required role? → \`bridge_spawn_worker\` **once** (do NOT spawn repeatedly)
426
+ - Report to user: what is stuck and why. Exit loop.
427
+ 3. Continue to next iteration (max 5 total)
428
+
429
+ After 5 iterations: report current \`bridge_get_execution_status\` snapshot and go idle. Tell the user to re-engage you for another status check.
430
+
431
+ **Hard limits in monitoring mode:**
432
+ - Max 5 poll iterations — never exceed this
433
+ - Max 1 \`bridge_spawn_worker\` call per deadlock — never spawn repeatedly
434
+ - If \`bridge_get_execution_status\` returns an error → exit loop, report to user
435
+
436
+ ### Step 7 — Wrap up
437
+ Summarize: todos completed, failed, any blockers. List what workers did.
438
+
439
+ ---
440
+
441
+ ${toolTable('bridge_get_project', 'bridge_get_plan', 'bridge_get_todos', 'bridge_add_todo', 'bridge_cancel_run', 'bridge_get_project_history', 'bridge_get_execution_status', 'bridge_list_agents', 'bridge_get_agent_status', 'bridge_spawn_worker', 'bridge_assign_task', 'bridge_get_todo_context', 'bridge_get_agent_output', 'bridge_kill_agent')}`,
442
+ };
443
+ /**
444
+ * Write a per-agent role system prompt file to tmp and return
445
+ * --append-system-prompt-file args for Claude, or
446
+ * --append-system-prompt args for Qwen.
447
+ * Returns [] if no role or agent doesn't support it.
448
+ */
449
+ function buildRolePromptArgs(agentKey, role, agentId) {
450
+ if (!role)
451
+ return [];
452
+ const content = ROLE_SYSTEM_PROMPTS[role];
453
+ if (!content)
454
+ return [];
455
+ if (agentKey === 'claude') {
456
+ try {
457
+ const tmpPath = path.join(os.tmpdir(), `bridge-role-${agentId}.md`);
458
+ const resolved = content.replaceAll('{{PANEL_ID}}', agentId);
459
+ fs.writeFileSync(tmpPath, resolved + '\n', 'utf-8');
460
+ console.log('[daemon] role.prompt.written', { agentId, role, tmpPath });
461
+ return ['--append-system-prompt-file', tmpPath];
462
+ }
463
+ catch (err) {
464
+ console.warn('[daemon] role.prompt.write.failed', { agentId, role, error: String(err) });
465
+ return [];
466
+ }
467
+ }
468
+ if (agentKey === 'qwen') {
469
+ // Qwen supports --append-system-prompt as a CLI flag
470
+ return ['--append-system-prompt', content];
471
+ }
472
+ if (agentKey === 'kimi') {
473
+ // Kimi supports inline system prompt via temp file
474
+ try {
475
+ const tmpPath = path.join(os.tmpdir(), `bridge-role-kimi-${agentId}.md`);
476
+ const resolved = content.replaceAll('{{PANEL_ID}}', agentId);
477
+ fs.writeFileSync(tmpPath, resolved + '\n', 'utf-8');
478
+ console.log('[daemon] kimi.role.prompt.written', { agentId, role, tmpPath });
479
+ return ['--system-prompt-file', tmpPath];
480
+ }
481
+ catch (err) {
482
+ console.warn('[daemon] kimi.role.prompt.write.failed', { agentId, role, error: String(err) });
483
+ return [];
484
+ }
485
+ }
486
+ return [];
487
+ }
488
+ function toTomlString(v) {
489
+ return `"${v.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"`;
490
+ }
491
+ function buildKimiMcpConfigArgs(ctx) {
492
+ try {
493
+ const bridgeMcpUrl = process.env['BRIDGE_MCP_URL'];
494
+ const config = bridgeMcpUrl
495
+ ? {
496
+ mcpServers: {
497
+ bridge: {
498
+ type: 'http',
499
+ url: `${bridgeMcpUrl}/mcp/${ctx.workspaceId}/${ctx.projectId}`,
500
+ headers: {
501
+ Authorization: `Bearer ${ctx.token}`,
502
+ 'x-panel-id': ctx.agentId ?? '',
503
+ },
504
+ },
505
+ },
506
+ }
507
+ : {
508
+ mcpServers: {
509
+ bridge: {
510
+ command: resolveMcpBin(),
511
+ args: [],
512
+ env: {
513
+ BRIDGE_SERVER_URL: ctx.serverUrl,
514
+ BRIDGE_TOKEN: ctx.token,
515
+ BRIDGE_WORKSPACE_ID: ctx.workspaceId,
516
+ BRIDGE_PROJECT_ID: ctx.projectId,
517
+ BRIDGE_PANEL_ID: ctx.agentId ?? '',
518
+ HTTP_MODE: 'false',
519
+ },
520
+ },
521
+ },
522
+ };
523
+ const tmpPath = path.join(os.tmpdir(), `bridge-mcp-kimi-${ctx.agentId ?? ctx.projectId}.json`);
524
+ fs.writeFileSync(tmpPath, JSON.stringify(config, null, 2) + '\n', 'utf-8');
525
+ console.log('[daemon] kimi.mcp.config.written', { tmpPath, transport: bridgeMcpUrl ? 'http' : 'stdio' });
526
+ return ['--mcp-config-file', tmpPath];
527
+ }
528
+ catch (err) {
529
+ console.warn('[daemon] kimi.mcp.config.build.failed', { error: String(err) });
530
+ return [];
531
+ }
532
+ }
533
+ function buildCodexMcpConfigArgs(ctx) {
534
+ const mcpBin = resolveMcpBin();
535
+ const envInline = `{BRIDGE_SERVER_URL=${toTomlString(ctx.serverUrl)},BRIDGE_TOKEN=${toTomlString(ctx.token)},BRIDGE_WORKSPACE_ID=${toTomlString(ctx.workspaceId)},BRIDGE_PROJECT_ID=${toTomlString(ctx.projectId)},HTTP_MODE="false"}`;
536
+ return [
537
+ '-c', 'mcp_servers.bridge.transport="stdio"',
538
+ '-c', `mcp_servers.bridge.command=${toTomlString(mcpBin)}`,
539
+ '-c', 'mcp_servers.bridge.args=[]',
540
+ '-c', `mcp_servers.bridge.env=${envInline}`,
541
+ ];
542
+ }
543
+ function ensureQwenProjectMcp(ctx) {
544
+ if (!ctx.cwd) {
545
+ console.warn('[daemon] qwen.mcp.setup.skipped', { reason: 'missing_cwd', projectId: ctx.projectId });
546
+ return false;
547
+ }
548
+ const mcpBin = resolveMcpBin();
549
+ const common = {
550
+ cwd: ctx.cwd,
551
+ encoding: 'utf-8',
552
+ timeout: 5000,
553
+ stdio: 'pipe',
554
+ };
555
+ // Idempotent reset to avoid stale env from prior projects.
556
+ spawnSync('qwen', ['mcp', 'remove', '--scope', 'project', 'bridge'], common);
557
+ const add = spawnSync('qwen', [
558
+ 'mcp', 'add', '--scope', 'project',
559
+ '-t', 'stdio',
560
+ '-e', `BRIDGE_SERVER_URL=${ctx.serverUrl}`,
561
+ '-e', `BRIDGE_TOKEN=${ctx.token}`,
562
+ '-e', `BRIDGE_WORKSPACE_ID=${ctx.workspaceId}`,
563
+ '-e', `BRIDGE_PROJECT_ID=${ctx.projectId}`,
564
+ '-e', `BRIDGE_PANEL_ID=${ctx.agentId ?? ''}`,
565
+ '-e', 'HTTP_MODE=false',
566
+ 'bridge', mcpBin,
567
+ ], common);
568
+ if (add.status === 0) {
569
+ console.log('[daemon] qwen.mcp.setup.ok', { cwd: ctx.cwd, projectId: ctx.projectId });
570
+ return true;
571
+ }
572
+ console.warn('[daemon] qwen.mcp.setup.failed', {
573
+ cwd: ctx.cwd,
574
+ projectId: ctx.projectId,
575
+ status: add.status,
576
+ stderr: (add.stderr ?? '').toString().slice(0, 300),
577
+ });
578
+ return false;
579
+ }
580
+ let cachedAgents = [];
581
+ let _isConnected = false;
582
+ let _started = false;
583
+ export function isDaemonWsConnected() { return _isConnected; }
584
+ export function startDaemonConnection(manager) {
585
+ if (_started)
586
+ throw new Error('[daemon] startDaemonConnection called twice — only one connection manager allowed');
587
+ _started = true;
588
+ const config = loadConfig();
589
+ const daemonId = createHash('sha256').update(config.token).digest('hex');
590
+ let ws = null;
591
+ let reconnectTimer = null;
592
+ let heartbeatTimer = null;
593
+ let consecutive1008 = 0; // exit only after 2 consecutive auth failures — avoids transient proxy 1008s
594
+ function connect() {
595
+ if (reconnectTimer) {
596
+ clearTimeout(reconnectTimer);
597
+ reconnectTimer = null;
598
+ }
599
+ ws = new WebSocket(config.server, {
600
+ headers: { Authorization: `Bearer ${config.token}` },
601
+ });
602
+ const currentWs = ws;
603
+ let stopMetrics = null;
604
+ currentWs.on('open', () => {
605
+ _isConnected = true;
606
+ console.log('[daemon] ws.connected', { server: config.server });
607
+ heartbeatTimer = setInterval(() => {
608
+ if (currentWs.readyState === WebSocket.OPEN)
609
+ currentWs.ping();
610
+ }, KEEPALIVE_MS);
611
+ currentWs.send(JSON.stringify({ type: 'ready', version: '1.1', name: config.name }));
612
+ void detectAgents().then(list => {
613
+ cachedAgents = list;
614
+ if (currentWs.readyState === WebSocket.OPEN) {
615
+ currentWs.send(JSON.stringify({ type: 'agents', list: cachedAgents }));
616
+ }
617
+ });
618
+ stopMetrics = startMetricsRelay((metrics) => {
619
+ if (currentWs.readyState === WebSocket.OPEN) {
620
+ currentWs.send(JSON.stringify({ type: 'system_metrics', daemonId, ...metrics }));
621
+ }
622
+ });
623
+ });
624
+ currentWs.on('message', (raw) => {
625
+ let msg;
626
+ try {
627
+ msg = JSON.parse(raw.toString());
628
+ }
629
+ catch {
630
+ console.warn('[daemon] Invalid JSON from server, ignoring');
631
+ return;
632
+ }
633
+ handleMessage(msg, currentWs, manager, config);
634
+ });
635
+ currentWs.on('close', (code) => {
636
+ _isConnected = false;
637
+ if (heartbeatTimer) {
638
+ clearInterval(heartbeatTimer);
639
+ heartbeatTimer = null;
640
+ }
641
+ stopMetrics?.();
642
+ stopMetrics = null;
643
+ if (code === 1008) {
644
+ consecutive1008++;
645
+ if (consecutive1008 >= 2) {
646
+ console.error('[daemon] ws.auth_failed — token invalid or expired (2 consecutive rejections), stopping. Re-run: bridge-agent auth');
647
+ process.exit(1);
648
+ }
649
+ console.warn('[daemon] ws.auth_rejected — transient 1008, will retry once', { attempt: consecutive1008 });
650
+ }
651
+ else {
652
+ consecutive1008 = 0;
653
+ }
654
+ if (reconnectTimer)
655
+ return;
656
+ console.log('[daemon] ws.reconnecting', { attempt: 1 });
657
+ reconnectTimer = setTimeout(connect, 3000);
658
+ });
659
+ currentWs.on('error', (err) => {
660
+ console.error('[daemon] ws.error', { message: err.message });
661
+ });
662
+ }
663
+ function cleanShutdown() {
664
+ if (heartbeatTimer) {
665
+ clearInterval(heartbeatTimer);
666
+ heartbeatTimer = null;
667
+ }
668
+ stopQuotaWatcher();
669
+ for (const stop of usageWatchers.values())
670
+ stop();
671
+ manager.killAll();
672
+ ws?.close();
673
+ }
674
+ process.on('SIGINT', () => { cleanShutdown(); process.exit(0); });
675
+ process.on('SIGTERM', () => { cleanShutdown(); process.exit(0); });
676
+ process.on('uncaughtException', (err) => {
677
+ console.error('[daemon] uncaughtException', { error: err.message });
678
+ manager.killAll();
679
+ process.exit(1);
680
+ });
681
+ connect();
682
+ }
683
+ function handleMessage(msg, ws, manager, config) {
684
+ switch (msg.type) {
685
+ case 'spawn': {
686
+ console.log('[daemon] pty.spawn.start', { agentId: msg.agentId, agentKey: msg.agentKey, sessionId: msg.sessionId, projectId: msg.projectId, workspaceId: msg.workspaceId, role: msg.role });
687
+ const agent = cachedAgents.find(a => a.key === msg.agentKey);
688
+ if (!agent) {
689
+ if (ws.readyState === WebSocket.OPEN) {
690
+ ws.send(JSON.stringify({
691
+ type: 'error',
692
+ code: 'AGENT_NOT_FOUND',
693
+ message: `Agent '${msg.agentKey}' is not installed on this machine`,
694
+ }));
695
+ }
696
+ return;
697
+ }
698
+ const spec = AGENT_SPECS.find(s => s.key === msg.agentKey);
699
+ let args = [];
700
+ if (msg.sessionId && spec?.resumeArgs) {
701
+ // Resuming a specific session
702
+ args = spec.resumeArgs(msg.sessionId);
703
+ console.log('[daemon] pty.spawn.resume', { agentId: msg.agentId, sessionId: msg.sessionId });
704
+ // (Re)start usage watcher for resumed Claude session
705
+ if (msg.agentKey === 'claude') {
706
+ usageWatchers.get(msg.agentId)?.();
707
+ usageWatchers.set(msg.agentId, startClaudeUsageWatcher(msg.agentId, msg.sessionId, (agentId, usedPct, usedTokens) => {
708
+ if (ws.readyState === WebSocket.OPEN) {
709
+ ws.send(JSON.stringify({ type: 'panel_token_usage', agentId, usedPct, usedTokens, ...latestQuota }));
710
+ }
711
+ }));
712
+ }
713
+ }
714
+ else if (spec?.assignSessionId) {
715
+ // Fresh spawn for session-capable agent — assign stable UUID now
716
+ const newSessionId = crypto.randomUUID();
717
+ args = [...(spec.spawnArgs ?? []), '--session-id', newSessionId];
718
+ if (ws.readyState === WebSocket.OPEN) {
719
+ ws.send(JSON.stringify({ type: 'session_started', agentId: msg.agentId, sessionId: newSessionId }));
720
+ console.log('[daemon] session.assigned', { agentId: msg.agentId, sessionId: newSessionId });
721
+ }
722
+ // Start usage watcher for fresh Claude session
723
+ if (msg.agentKey === 'claude') {
724
+ usageWatchers.get(msg.agentId)?.();
725
+ usageWatchers.set(msg.agentId, startClaudeUsageWatcher(msg.agentId, newSessionId, (agentId, usedPct, usedTokens) => {
726
+ if (ws.readyState === WebSocket.OPEN) {
727
+ ws.send(JSON.stringify({ type: 'panel_token_usage', agentId, usedPct, usedTokens, ...latestQuota }));
728
+ }
729
+ }));
730
+ }
731
+ }
732
+ else {
733
+ // Non-session agent (e.g. sh) — still notify browser so panel transitions to 'running'
734
+ if (ws.readyState === WebSocket.OPEN) {
735
+ ws.send(JSON.stringify({ type: 'session_started', agentId: msg.agentId, sessionId: crypto.randomUUID() }));
736
+ }
737
+ }
738
+ // Build MCP spawn context if the spawn message includes project info
739
+ let spawnCtx;
740
+ let mcpConfigured = false;
741
+ let mcpTransport;
742
+ if (msg.projectId && msg.workspaceId) {
743
+ const serverUrl = deriveServerUrl(config.server);
744
+ const projectSettings = loadProjectSettings(msg.cwd);
745
+ spawnCtx = {
746
+ serverUrl,
747
+ token: config.token,
748
+ workspaceId: msg.workspaceId,
749
+ projectId: msg.projectId,
750
+ agentId: msg.agentId,
751
+ cwd: msg.cwd,
752
+ projectEnv: projectSettings.env,
753
+ };
754
+ // Agent-specific MCP wiring: different CLIs use different config surfaces.
755
+ if (msg.agentKey === 'claude') {
756
+ const mcpArgs = buildMcpConfigArgs(spawnCtx);
757
+ mcpConfigured = mcpArgs.length > 0;
758
+ mcpTransport = process.env['BRIDGE_MCP_URL'] ? 'http' : 'stdio';
759
+ args = [...args, ...mcpArgs];
760
+ }
761
+ else if (msg.agentKey === 'codex') {
762
+ const codexArgs = buildCodexMcpConfigArgs(spawnCtx);
763
+ mcpConfigured = codexArgs.length > 0;
764
+ mcpTransport = 'stdio';
765
+ args = [...args, ...codexArgs];
766
+ }
767
+ else if (msg.agentKey === 'qwen') {
768
+ mcpConfigured = ensureQwenProjectMcp(spawnCtx);
769
+ mcpTransport = mcpConfigured ? 'stdio' : undefined;
770
+ }
771
+ else if (msg.agentKey === 'kimi') {
772
+ const kimiMcpArgs = buildKimiMcpConfigArgs(spawnCtx);
773
+ mcpConfigured = kimiMcpArgs.length > 0;
774
+ mcpTransport = process.env['BRIDGE_MCP_URL'] ? 'http' : 'stdio';
775
+ args = [...args, ...kimiMcpArgs];
776
+ }
777
+ else {
778
+ mcpConfigured = false;
779
+ console.log('[daemon] mcp.config.skipped', { agentId: msg.agentId, agentKey: msg.agentKey, reason: 'unsupported_agent_path' });
780
+ }
781
+ }
782
+ // Inject role-specific system prompt regardless of MCP setup —
783
+ // role identity must work even without project context
784
+ const roleArgs = buildRolePromptArgs(msg.agentKey, msg.role, msg.agentId);
785
+ if (roleArgs.length > 0)
786
+ args = [...args, ...roleArgs];
787
+ const clampedCols = Math.max(1, Math.min(500, msg.cols));
788
+ const clampedRows = Math.max(1, Math.min(500, msg.rows));
789
+ const spawnStartedAt = Date.now();
790
+ let firstOutputSnippet = '';
791
+ let outputBytes = 0;
792
+ let codexOnboardingAcked = false;
793
+ const ok = manager.spawn(msg.agentId, msg.agentKey, agent.binaryPath, args, clampedCols, clampedRows, (data) => {
794
+ outputBytes += data.length;
795
+ if (!firstOutputSnippet) {
796
+ try {
797
+ const decoded = Buffer.from(data, 'base64').toString('utf-8');
798
+ const cleaned = stripAnsi(decoded).replace(/\x00/g, '').trim();
799
+ if (cleaned)
800
+ firstOutputSnippet = clip(cleaned);
801
+ // Codex may block on a one-time onboarding yes/no prompt.
802
+ // Auto-ack only for this specific startup banner pattern.
803
+ if (msg.agentKey === 'codex' &&
804
+ !codexOnboardingAcked &&
805
+ (Date.now() - spawnStartedAt) < 20_000 &&
806
+ /included in your plan for free|let[’']s build together/i.test(cleaned) &&
807
+ /yes|no|\[y\/n\]|\(y\/n\)|y\/n/i.test(cleaned)) {
808
+ codexOnboardingAcked = true;
809
+ const yes = Buffer.from('y').toString('base64');
810
+ manager.write(msg.agentId, yes, 'orchestrator');
811
+ console.log('[daemon] codex.onboarding.auto_ack', { agentId: msg.agentId });
812
+ }
813
+ }
814
+ catch {
815
+ // ignore decode errors for logging
816
+ }
817
+ }
818
+ if (ws.readyState === WebSocket.OPEN) {
819
+ ws.send(JSON.stringify({ type: 'output', agentId: msg.agentId, data }));
820
+ }
821
+ }, (exitCode, signal) => {
822
+ const uptimeMs = Date.now() - spawnStartedAt;
823
+ const earlyExit = uptimeMs <= EARLY_EXIT_MS;
824
+ console.log('[daemon] pty.spawn.result', {
825
+ agentId: msg.agentId,
826
+ agentKey: msg.agentKey,
827
+ daemonId: msg.daemonId,
828
+ exitCode,
829
+ signal,
830
+ uptimeMs,
831
+ earlyExit,
832
+ outputBytes,
833
+ firstOutputSnippet: firstOutputSnippet || undefined,
834
+ });
835
+ if (earlyExit && ws.readyState === WebSocket.OPEN) {
836
+ ws.send(JSON.stringify({
837
+ type: 'error',
838
+ code: 'SPAWN_FAILED',
839
+ message: `Early exit: agent=${msg.agentKey} code=${exitCode ?? 'null'} signal=${signal ?? 'null'} snippet="${firstOutputSnippet || 'no output'}"`,
840
+ }));
841
+ }
842
+ if (ws.readyState === WebSocket.OPEN) {
843
+ ws.send(JSON.stringify({ type: 'exit', agentId: msg.agentId, exitCode, signal }));
844
+ }
845
+ }, spawnCtx);
846
+ if (!ok) {
847
+ if (ws.readyState === WebSocket.OPEN) {
848
+ ws.send(JSON.stringify({
849
+ type: 'error',
850
+ code: 'SPAWN_DUPLICATE',
851
+ message: `Panel ${msg.agentId} is already running`,
852
+ }));
853
+ }
854
+ }
855
+ else {
856
+ if (ws.readyState === WebSocket.OPEN) {
857
+ ws.send(JSON.stringify({
858
+ type: 'mcp_status',
859
+ agentId: msg.agentId,
860
+ mcpConfigured,
861
+ transport: mcpConfigured ? mcpTransport : undefined,
862
+ projectId: spawnCtx?.projectId,
863
+ }));
864
+ }
865
+ }
866
+ break;
867
+ }
868
+ case 'input':
869
+ manager.write(msg.agentId, msg.data, msg.source);
870
+ break;
871
+ case 'kill':
872
+ usageWatchers.get(msg.agentId)?.();
873
+ usageWatchers.delete(msg.agentId);
874
+ manager.kill(msg.agentId, msg.force);
875
+ break;
876
+ case 'resize':
877
+ manager.resize(msg.agentId, msg.cols, msg.rows);
878
+ break;
879
+ case 'detect_agents':
880
+ void detectAgents().then(list => {
881
+ cachedAgents = list;
882
+ if (ws.readyState === WebSocket.OPEN) {
883
+ ws.send(JSON.stringify({ type: 'agents', list }));
884
+ }
885
+ });
886
+ break;
887
+ case 'dir_list': {
888
+ const homeDir = os.homedir();
889
+ // Expand leading ~ to home directory before resolving
890
+ const expanded = (msg.path || '~').replace(/^~/, homeDir);
891
+ const safePath = path.resolve(expanded);
892
+ if (safePath !== homeDir && !safePath.startsWith(homeDir + path.sep)) {
893
+ if (ws.readyState === WebSocket.OPEN) {
894
+ ws.send(JSON.stringify({ type: 'error', code: 'INVALID_MSG', message: 'Path outside home directory' }));
895
+ }
896
+ return;
897
+ }
898
+ try {
899
+ const entries = fs.readdirSync(safePath, { withFileTypes: true })
900
+ .filter(e => e.isDirectory() && !e.name.startsWith('.'))
901
+ .map(e => ({ name: e.name, path: path.join(safePath, e.name) }))
902
+ .sort((a, b) => a.name.localeCompare(b.name));
903
+ if (ws.readyState === WebSocket.OPEN) {
904
+ ws.send(JSON.stringify({ type: 'dir_list_result', requestId: msg.requestId, path: safePath, entries }));
905
+ }
906
+ }
907
+ catch (err) {
908
+ if (ws.readyState === WebSocket.OPEN) {
909
+ ws.send(JSON.stringify({
910
+ type: 'dir_list_result',
911
+ requestId: msg.requestId,
912
+ path: safePath,
913
+ entries: [],
914
+ error: err instanceof Error ? err.message : 'Cannot read directory',
915
+ }));
916
+ }
917
+ }
918
+ break;
919
+ }
920
+ default: {
921
+ const _ = msg;
922
+ void _;
923
+ }
924
+ }
925
+ }